Compare commits
1 Commits
v0.3.27
...
integer_da
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
034ffa93fa |
179
.cirrus.yml
179
.cirrus.yml
@@ -1,179 +0,0 @@
|
||||
macos_instance:
|
||||
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
|
||||
|
||||
#task:
|
||||
# name: AppleM1/LLVM
|
||||
# compile_script:
|
||||
# - brew install llvm
|
||||
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang
|
||||
|
||||
#task:
|
||||
# name: AppleM1/LLVM/ILP64
|
||||
# compile_script:
|
||||
# - brew install llvm
|
||||
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang INTERFACE64=1
|
||||
|
||||
#task:
|
||||
# name: AppleM1/LLVM/CMAKE
|
||||
# compile_script:
|
||||
# - brew install llvm
|
||||
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
# - mkdir build
|
||||
# - cd build
|
||||
# - cmake -DTARGET=VORTEX -DCMAKE_C_COMPILER=clang -DBUILD_SHARED_LIBS=ON ..
|
||||
# - make -j 4
|
||||
|
||||
#task:
|
||||
# name: AppleM1/GCC/MAKE/OPENMP
|
||||
# compile_script:
|
||||
# - brew install gcc@11
|
||||
# - export PATH=/opt/homebrew/bin:$PATH
|
||||
# - export LDFLAGS="-L/opt/homebrew/lib"
|
||||
# - export CPPFLAGS="-I/opt/homebrew/include"
|
||||
# - make CC=gcc-11 FC=gfortran-11 USE_OPENMP=1
|
||||
|
||||
macos_instance:
|
||||
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
|
||||
task:
|
||||
name: AppleM1/LLVM x86_64 xbuild
|
||||
compile_script:
|
||||
- #brew install llvm
|
||||
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
- export ARCHS="i386 x86_64"
|
||||
- export ARCHS_STANDARD="i386 x86_64"
|
||||
- export ARCHS_STANDARD_32_64_BIT="i386 x86_64"
|
||||
- export ARCHS_STANDARD_64_BIT=x86_64
|
||||
- export ARCHS_STANDARD_INCLUDING_64_BIT="i386 x86_64"
|
||||
- export ARCHS_UNIVERSAL_IPHONE_OS="i386 x86_64"
|
||||
- export VALID_ARCHS="i386 x86_64"
|
||||
- xcrun --sdk macosx --show-sdk-path
|
||||
- xcodebuild -version
|
||||
- export CC=/Applications/Xcode-15.3.0.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode-15.3.0.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.4.sdk -arch x86_64"
|
||||
- make TARGET=CORE2 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
|
||||
always:
|
||||
config_artifacts:
|
||||
path: "*conf*"
|
||||
type: text/plain
|
||||
# lib_artifacts:
|
||||
# path: "libopenblas*"
|
||||
# type: application/octet-streamm
|
||||
|
||||
macos_instance:
|
||||
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
|
||||
task:
|
||||
name: AppleM1/LLVM armv8-ios xbuild
|
||||
compile_script:
|
||||
- #brew install llvm
|
||||
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
- export CC=/Applications/Xcode-15.3.0.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode-15.3.0.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.4.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
- make TARGET=ARMV8 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 CROSS=1
|
||||
always:
|
||||
config_artifacts:
|
||||
path: "*conf*"
|
||||
type: text/plain
|
||||
|
||||
macos_instance:
|
||||
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
|
||||
task:
|
||||
name: AppleM1/LLVM armv7-androidndk xbuild
|
||||
compile_script:
|
||||
- brew install android-ndk
|
||||
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
- ls /System/Volumes/Data/opt/homebrew
|
||||
- ls -l /System/Volumes/Data/opt/homebrew/Caskroom/
|
||||
- find /System/Volumes/Data/opt/homebrew -name "armv7a-linux-androideabi*-ranlib"
|
||||
- #export CC=/Applications/Xcode-13.4.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
- #export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode-13.4.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.0.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
- export CC=/System/Volumes/Data/opt/homebrew/Caskroom/android-ndk/26c/AndroidNDK*.app/Contents/NDK/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi23-clang
|
||||
- make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
|
||||
always:
|
||||
config_artifacts:
|
||||
path: "*conf*"
|
||||
type: text/plain
|
||||
|
||||
task:
|
||||
name: NeoverseN1
|
||||
arm_container:
|
||||
image: node:latest
|
||||
compile_script:
|
||||
- make
|
||||
|
||||
task:
|
||||
name: NeoverseN1-ILP64
|
||||
arm_container:
|
||||
image: node:latest
|
||||
compile_script:
|
||||
- make INTERFACE64=1
|
||||
|
||||
task:
|
||||
name: NeoverseN1-OMP
|
||||
arm_container:
|
||||
image: node:latest
|
||||
cpu: 8
|
||||
compile_script:
|
||||
- make USE_OPENMP=1
|
||||
|
||||
FreeBSD_task:
|
||||
name: FreeBSD-gcc12
|
||||
freebsd_instance:
|
||||
image_family: freebsd-13-2
|
||||
install_script:
|
||||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
|
||||
compile_script:
|
||||
- ls -l /usr/local/lib
|
||||
- gmake CC=gcc
|
||||
|
||||
|
||||
FreeBSD_task:
|
||||
name: freebsd-gcc12-ilp64
|
||||
freebsd_instance:
|
||||
image_family: freebsd-13-2
|
||||
install_script:
|
||||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
|
||||
compile_script:
|
||||
- ls -l /usr/local/lib
|
||||
- gmake CC=gcc INTERFACE64=1
|
||||
|
||||
FreeBSD_task:
|
||||
name: FreeBSD-clang-openmp
|
||||
freebsd_instance:
|
||||
image_family: freebsd-13-2
|
||||
install_script:
|
||||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
|
||||
- ln -s /usr/local/lib/gcc12/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
|
||||
compile_script:
|
||||
- gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
|
||||
|
||||
#task:
|
||||
# name: Windows/LLVM16 --- too slow ---
|
||||
# windows_container:
|
||||
# image: cirrusci/windowsservercore:cmake-2021.12.07
|
||||
# install_script:
|
||||
# - choco list --localonly
|
||||
# - choco install -y llvm
|
||||
# - # choco install -y cmake --installargs '"ADD_CMAKE_TO_PATH=System"'
|
||||
# - choco install -y ninja
|
||||
# - refreshenv
|
||||
# - cd "c:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Auxiliary/Build"
|
||||
# - vcvarsall x64
|
||||
# - cd "C:\Users\ContainerAdministrator\AppData\Local\Temp\cirrus-ci-build"
|
||||
# - cmake -S . -B build -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release
|
||||
# - cd build
|
||||
# - cmake --build .
|
||||
# - ctest
|
||||
16
.cirun.yml
16
.cirun.yml
@@ -1,16 +0,0 @@
|
||||
# Self-Hosted Github Action Runners on AWS via Cirun.io
|
||||
# Reference: https://docs.cirun.io/reference/yaml
|
||||
runners:
|
||||
- name: "aws-runner-graviton"
|
||||
# Cloud Provider: AWS
|
||||
cloud: "aws"
|
||||
region: "us-east-1"
|
||||
# Cheapest VM on AWS
|
||||
instance_type: "c7g.large"
|
||||
# Ubuntu-22.04, ami image
|
||||
machine_image: "ami-0a0c8eebcdd6dcbd0"
|
||||
preemptible: false
|
||||
# Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
|
||||
# So that this runner is created for running the workflow
|
||||
labels:
|
||||
- "cirun-aws-runner-graviton"
|
||||
216
.drone.yml
216
.drone.yml
@@ -1,216 +0,0 @@
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_gcc_make
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm32_gcc_make
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV6 NUM_THREADS=32'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_clang_make
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: clang
|
||||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm32_clang_cmake
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: clang
|
||||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV6 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
|
||||
commands:
|
||||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC g++ perl cmake
|
||||
- $CC --version
|
||||
- mkdir build && cd build
|
||||
- cmake $CMAKE_FLAGS ..
|
||||
- make -j
|
||||
- ctest -V
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_gcc_cmake
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
|
||||
commands:
|
||||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC g++ perl cmake
|
||||
- $CC --version
|
||||
- mkdir build && cd build
|
||||
- cmake $CMAKE_FLAGS ..
|
||||
- make -j
|
||||
- ctest -V
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_clang_cmake
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: clang
|
||||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
|
||||
commands:
|
||||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC g++ perl cmake
|
||||
- $CC --version
|
||||
- mkdir build && cd build
|
||||
- cmake $CMAKE_FLAGS ..
|
||||
- make -j
|
||||
- ctest -V
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_native_test
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'USE_OPENMP=1'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl python g++
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
- make -C cpp_thread_test dgemm_tester
|
||||
---
|
||||
kind: pipeline
|
||||
name: epyc_native_test
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: amd64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'USE_OPENMP=1'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl python g++
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
- make -C cpp_thread_test dgemm_tester
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_gcc10
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:20.04
|
||||
environment:
|
||||
CC: gcc-10
|
||||
FC: gfortran-10
|
||||
COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran-10 perl python g++
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
|
||||
149
.github/workflows/apple_m.yml
vendored
149
.github/workflows/apple_m.yml
vendored
@@ -1,149 +0,0 @@
|
||||
name: apple m
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: macos-14
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build: [cmake, make]
|
||||
fortran: [gfortran]
|
||||
openmp: [0, 1]
|
||||
ilp64: [0, 1]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Print system information
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
cat /proc/cpuinfo
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
sysctl -a | grep machdep.cpu
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
|
||||
brew reinstall gcc
|
||||
brew install coreutils cmake ccache
|
||||
brew install llvm
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
# GNU make and cmake call the compilers differently. It looks like
|
||||
# that causes the cache to mismatch. Keep the ccache for both build
|
||||
# tools separate to avoid polluting each other.
|
||||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" = "make" ]; then
|
||||
# Add ccache to path
|
||||
if [ "$RUNNER_OS" = "Linux" ]; then
|
||||
echo "/usr/lib/ccache" >> $GITHUB_PATH
|
||||
elif [ "$RUNNER_OS" = "macOS" ]; then
|
||||
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
|
||||
echo "/opt/homebrew/opt/llvm/bin" >>$GITHUB_PATH
|
||||
echo "" >>$GITHUB_PATH
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
export CC="/opt/homebrew/opt/llvm/bin/clang"
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=${{matrix.openmp}} INTERFACE64=${{matrix.ilp64}} FC="ccache ${{ matrix.fortran }}"
|
||||
;;
|
||||
"cmake")
|
||||
export LDFLAGS="$LDFLAGS -Wl,-ld_classic"
|
||||
mkdir build && cd build
|
||||
cmake -DDYNAMIC_ARCH=1 \
|
||||
-DUSE_OPENMP=${{matrix.openmp}} \
|
||||
-DINTERFACE64=${{matrix.ilp64}} \
|
||||
-DNOFORTRAN=0 \
|
||||
-DBUILD_WITHOUT_LAPACK=0 \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
cmake --build .
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
|
||||
echo "::group::Tests in 'test' directory"
|
||||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'ctest' directory"
|
||||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'utest' directory"
|
||||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
;;
|
||||
"cmake")
|
||||
cd build && ctest
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
139
.github/workflows/arm64_graviton.yml
vendored
139
.github/workflows/arm64_graviton.yml
vendored
@@ -1,139 +0,0 @@
|
||||
name: arm64 graviton cirun
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
- release-**
|
||||
pull_request:
|
||||
branches:
|
||||
- develop
|
||||
- release-**
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}"
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
fortran: [gfortran]
|
||||
build: [cmake, make]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Print system information
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
cat /proc/cpuinfo
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt update
|
||||
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
# GNU make and cmake call the compilers differently. It looks like
|
||||
# that causes the cache to mismatch. Keep the ccache for both build
|
||||
# tools separate to avoid polluting each other.
|
||||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" = "make" ]; then
|
||||
# Add ccache to path
|
||||
if [ "$RUNNER_OS" = "Linux" ]; then
|
||||
echo "/usr/lib/ccache" >> $GITHUB_PATH
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
|
||||
;;
|
||||
"cmake")
|
||||
mkdir build && cd build
|
||||
cmake -DDYNAMIC_ARCH=1 \
|
||||
-DNOFORTRAN=0 \
|
||||
-DBUILD_WITHOUT_LAPACK=0 \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
cmake --build .
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
|
||||
echo "::group::Tests in 'test' directory"
|
||||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'ctest' directory"
|
||||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'utest' directory"
|
||||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
;;
|
||||
"cmake")
|
||||
cd build && ctest
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
126
.github/workflows/c910v.yml
vendored
126
.github/workflows/c910v.yml
vendored
@@ -1,126 +0,0 @@
|
||||
name: c910v qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1698113812618
|
||||
toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0-20231018.tar.gz
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: RISCV64_GENERIC
|
||||
triple: riscv64-linux-gnu
|
||||
apt_triple: riscv64-linux-gnu
|
||||
opts: NO_SHARED=1 TARGET=RISCV64_GENERIC
|
||||
- target: C910V
|
||||
triple: riscv64-unknown-linux-gnu
|
||||
apt_triple: riscv64-linux-gnu
|
||||
opts: NO_SHARED=1 TARGET=C910V
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install build deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
|
||||
gcc-${{ matrix.apt_triple }} gfortran-${{ matrix.apt_triple }} libgomp1-riscv64-cross
|
||||
|
||||
- name: checkout qemu
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: T-head-Semi/qemu
|
||||
path: qemu
|
||||
ref: 1e692ebb43d396c52352406323fc782c1ac99a42
|
||||
|
||||
- name: build qemu
|
||||
run: |
|
||||
# Force use c910v qemu-user
|
||||
wget https://github.com/revyos/qemu/commit/5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch
|
||||
cd qemu
|
||||
patch -p1 < ../5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch
|
||||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system
|
||||
make -j$(nproc)
|
||||
make install
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: build OpenBLAS
|
||||
run: |
|
||||
wget ${xuetie_toolchain}/${toolchain_file_name}
|
||||
tar -xvf ${toolchain_file_name} -C /opt
|
||||
export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0/bin:$PATH"
|
||||
|
||||
make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
|
||||
|
||||
- name: test
|
||||
run: |
|
||||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
|
||||
qemu-riscv64 ./utest/openblas_utest
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat2 < ./ctest/sin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat2 < ./ctest/din2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat2 < ./ctest/cin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat2 < ./ctest/zin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat3 < ./ctest/sin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat3 < ./ctest/din3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat3 < ./ctest/cin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat3 < ./ctest/zin3
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat1
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat
|
||||
371
.github/workflows/dynamic_arch.yml
vendored
371
.github/workflows/dynamic_arch.yml
vendored
@@ -1,371 +0,0 @@
|
||||
name: continuous build
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
fortran: [gfortran, flang]
|
||||
build: [cmake, make]
|
||||
exclude:
|
||||
- os: macos-latest
|
||||
fortran: flang
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Print system information
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
cat /proc/cpuinfo
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
sysctl -a | grep machdep.cpu
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
|
||||
brew reinstall gcc
|
||||
brew install coreutils cmake ccache
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
# GNU make and cmake call the compilers differently. It looks like
|
||||
# that causes the cache to mismatch. Keep the ccache for both build
|
||||
# tools separate to avoid polluting each other.
|
||||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" = "make" ]; then
|
||||
# Add ccache to path
|
||||
if [ "$RUNNER_OS" = "Linux" ]; then
|
||||
echo "/usr/lib/ccache" >> $GITHUB_PATH
|
||||
elif [ "$RUNNER_OS" = "macOS" ]; then
|
||||
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
if [ "${{ matrix.fortran }}" = "flang" ]; then
|
||||
# download and install classic flang
|
||||
cd /usr/
|
||||
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz
|
||||
sudo tar xf flang-20190329-x86-70.tgz
|
||||
sudo rm flang-20190329-x86-70.tgz
|
||||
cd -
|
||||
fi
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
|
||||
;;
|
||||
"cmake")
|
||||
mkdir build && cd build
|
||||
cmake -DDYNAMIC_ARCH=1 \
|
||||
-DNOFORTRAN=0 \
|
||||
-DBUILD_WITHOUT_LAPACK=0 \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
cmake --build .
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
|
||||
echo "::group::Tests in 'test' directory"
|
||||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'ctest' directory"
|
||||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'utest' directory"
|
||||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
;;
|
||||
"cmake")
|
||||
cd build && ctest
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
msys2:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: windows-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
|
||||
idx: [int32, int64]
|
||||
build-type: [Release]
|
||||
include:
|
||||
- msystem: UCRT64
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-ucrt-x86_64
|
||||
fc-pkg: fc
|
||||
- msystem: MINGW32
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-i686
|
||||
fc-pkg: fc
|
||||
- msystem: CLANG64
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-clang-x86_64
|
||||
fc-pkg: fc
|
||||
# Compiling with Flang 16 seems to cause test errors on machines
|
||||
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
|
||||
no-avx512-flags: -DNO_AVX512=1
|
||||
- msystem: CLANG32
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-clang-i686
|
||||
fc-pkg: cc
|
||||
c-lapack-flags: -DC_LAPACK=ON
|
||||
- msystem: UCRT64
|
||||
idx: int64
|
||||
idx64-flags: -DBINARY=64 -DINTERFACE64=1
|
||||
target-prefix: mingw-w64-ucrt-x86_64
|
||||
fc-pkg: fc
|
||||
- msystem: CLANG64
|
||||
idx: int64
|
||||
idx64-flags: -DBINARY=64 -DINTERFACE64=1
|
||||
target-prefix: mingw-w64-clang-x86_64
|
||||
fc-pkg: fc
|
||||
# Compiling with Flang 16 seems to cause test errors on machines
|
||||
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
|
||||
no-avx512-flags: -DNO_AVX512=1
|
||||
- msystem: UCRT64
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-ucrt-x86_64
|
||||
fc-pkg: fc
|
||||
build-type: None
|
||||
exclude:
|
||||
- msystem: MINGW32
|
||||
idx: int64
|
||||
- msystem: CLANG32
|
||||
idx: int64
|
||||
|
||||
defaults:
|
||||
run:
|
||||
# Use MSYS2 bash as default shell
|
||||
shell: msys2 {0}
|
||||
|
||||
env:
|
||||
CHERE_INVOKING: 1
|
||||
|
||||
steps:
|
||||
- name: Get CPU name
|
||||
shell: pwsh
|
||||
run : |
|
||||
Get-CIMInstance -Class Win32_Processor | Select-Object -Property Name
|
||||
|
||||
- name: Install build dependencies
|
||||
uses: msys2/setup-msys2@v2
|
||||
with:
|
||||
msystem: ${{ matrix.msystem }}
|
||||
update: true
|
||||
release: false # Use pre-installed version
|
||||
install: >-
|
||||
base-devel
|
||||
${{ matrix.target-prefix }}-cc
|
||||
${{ matrix.target-prefix }}-${{ matrix.fc-pkg }}
|
||||
${{ matrix.target-prefix }}-cmake
|
||||
${{ matrix.target-prefix }}-ninja
|
||||
${{ matrix.target-prefix }}-ccache
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Prepare ccache
|
||||
# Get cache location of ccache
|
||||
# Create key that is used in action/cache/restore and action/cache/save steps
|
||||
id: ccache-prepare
|
||||
run: |
|
||||
echo "ccachedir=$(cygpath -m $(ccache -k cache_dir))" >> $GITHUB_OUTPUT
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
echo "key=ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Restore ccache
|
||||
uses: actions/cache/restore@v3
|
||||
with:
|
||||
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
|
||||
key: ${{ steps.ccache-prepare.outputs.key }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch.
|
||||
restore-keys: |
|
||||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}
|
||||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}
|
||||
|
||||
- name: Configure ccache
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota.
|
||||
run: |
|
||||
which ccache
|
||||
test -d ${{ steps.ccache-prepare.outputs.ccachedir }} || mkdir -p ${{ steps.ccache-prepare.outputs.ccachedir }}
|
||||
echo "max_size = 250M" > ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
|
||||
echo "compression = true" >> ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
|
||||
ccache -p
|
||||
ccache -s
|
||||
echo $HOME
|
||||
cygpath -w $HOME
|
||||
|
||||
- name: Configure OpenBLAS
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DBUILD_STATIC_LIBS=ON \
|
||||
-DDYNAMIC_ARCH=ON \
|
||||
-DUSE_THREAD=ON \
|
||||
-DNUM_THREADS=64 \
|
||||
-DTARGET=CORE2 \
|
||||
${{ matrix.idx64-flags }} \
|
||||
${{ matrix.c-lapack-flags }} \
|
||||
${{ matrix.no-avx512-flags }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: cd build && cmake --build .
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Save ccache
|
||||
# Save the cache after we are done (successfully) building
|
||||
uses: actions/cache/save@v3
|
||||
with:
|
||||
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
|
||||
key: ${{ steps.ccache-prepare.outputs.key }}
|
||||
|
||||
- name: Run tests
|
||||
id: run-ctest
|
||||
timeout-minutes: 60
|
||||
run: cd build && ctest
|
||||
|
||||
- name: Re-run tests
|
||||
if: always() && (steps.run-ctest.outcome == 'failure')
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
cd build
|
||||
echo "::group::Re-run ctest"
|
||||
ctest --rerun-failed --output-on-failure || true
|
||||
echo "::endgroup::"
|
||||
echo "::group::Log from these tests"
|
||||
[ ! -f Testing/Temporary/LastTest.log ] || cat Testing/Temporary/LastTest.log
|
||||
echo "::endgroup::"
|
||||
|
||||
|
||||
cross_build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: mips64el
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: DYNAMIC_ARCH=1 TARGET=GENERIC
|
||||
- target: riscv64
|
||||
triple: riscv64-linux-gnu
|
||||
opts: TARGET=RISCV64_GENERIC
|
||||
- target: mipsel
|
||||
triple: mipsel-linux-gnu
|
||||
opts: TARGET=MIPS1004K
|
||||
- target: alpha
|
||||
triple: alpha-linux-gnu
|
||||
opts: TARGET=EV4
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }}
|
||||
119
.github/workflows/loongarch64.yml
vendored
119
.github/workflows/loongarch64.yml
vendored
@@ -1,119 +0,0 @@
|
||||
name: loongarch64 qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: LOONGSONGENERIC
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC
|
||||
- target: LOONGSON3R5
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
|
||||
- target: LOONGSON2K1000
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
|
||||
- target: DYNAMIC_ARCH
|
||||
triple: loongarch64-unknown-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install APT deps
|
||||
run: |
|
||||
sudo add-apt-repository ppa:savoury1/virtualisation
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
|
||||
qemu-user-static
|
||||
|
||||
- name: Download and install loongarch64-toolchain
|
||||
run: |
|
||||
wget https://github.com/sunhaiyong1978/CLFS-for-LoongArch/releases/download/8.1/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
|
||||
#wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
|
||||
tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt
|
||||
|
||||
- name: Set env
|
||||
run: |
|
||||
echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
|
||||
echo "PATH=$GITHUB_WORKSPACE:/opt/cross-tools/bin:$PATH" >> $GITHUB_ENV
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Disable utest dsdot:dsdot_n_1
|
||||
run: |
|
||||
echo -n > utest/test_dsdot.c
|
||||
echo "Due to the qemu versions 7.2 causing utest cases to fail,"
|
||||
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
|
||||
|
||||
- name: Test
|
||||
run: |
|
||||
qemu-loongarch64-static ./utest/openblas_utest
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
|
||||
122
.github/workflows/mips64.yml
vendored
122
.github/workflows/mips64.yml
vendored
@@ -1,122 +0,0 @@
|
||||
name: mips64 qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: MIPS64_GENERIC
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=MIPS64_GENERIC
|
||||
- target: SICORTEX
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=SICORTEX
|
||||
- target: I6400
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=I6400
|
||||
- target: P6600
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=P6600
|
||||
- target: I6500
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=I6500
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install build deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
|
||||
gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross
|
||||
|
||||
- name: checkout qemu
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: qemu/qemu
|
||||
path: qemu
|
||||
ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2
|
||||
|
||||
- name: build qemu
|
||||
run: |
|
||||
cd qemu
|
||||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system
|
||||
make -j$(nproc)
|
||||
make install
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: build OpenBLAS
|
||||
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
|
||||
|
||||
- name: test
|
||||
run: |
|
||||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
|
||||
qemu-mips64el ./utest/openblas_utest
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
|
||||
90
.github/workflows/nightly-Homebrew-build.yml
vendored
90
.github/workflows/nightly-Homebrew-build.yml
vendored
@@ -1,90 +0,0 @@
|
||||
# Only the "head" branch of the OpenBLAS package is tested
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- '**/nightly-Homebrew-build.yml'
|
||||
pull_request:
|
||||
branches:
|
||||
- develop
|
||||
paths:
|
||||
- '**/nightly-Homebrew-build.yml'
|
||||
schedule:
|
||||
- cron: 45 7 * * *
|
||||
# This is 7:45 AM UTC daily, late at night in the USA
|
||||
|
||||
# Since push and pull_request will still always be building and testing the `develop` branch,
|
||||
# it only makes sense to test if this file has been changed
|
||||
|
||||
name: Nightly-Homebrew-Build
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build-OpenBLAS-with-Homebrew:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: macos-latest
|
||||
env:
|
||||
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer
|
||||
HOMEBREW_DEVELOPER: "ON"
|
||||
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON"
|
||||
HOMEBREW_NO_ANALYTICS: "ON"
|
||||
HOMEBREW_NO_AUTO_UPDATE: "ON"
|
||||
HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON"
|
||||
HOMEBREW_NO_INSTALL_CLEANUP: "ON"
|
||||
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: "ON"
|
||||
HOMEBREW_NO_INSTALL_FROM_API: "ON"
|
||||
|
||||
steps:
|
||||
- name: Random delay for cron job
|
||||
run: |
|
||||
delay=$(( RANDOM % 600 ))
|
||||
printf 'Delaying for %s seconds on event %s' ${delay} "${{ github.event_name }}"
|
||||
sleep ${delay}
|
||||
if: github.event_name == 'schedule'
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
# This isn't even needed, technically. Homebrew will get `develop` via git
|
||||
|
||||
- name: Update Homebrew
|
||||
if: github.event_name != 'pull_request'
|
||||
run: brew update || true
|
||||
|
||||
- name: Install prerequisites
|
||||
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas
|
||||
|
||||
- name: Install and bottle OpenBLAS
|
||||
run: brew install --fetch-HEAD --HEAD --build-bottle --keep-tmp openblas
|
||||
# the HEAD flags tell Homebrew to build the develop branch fetch via git
|
||||
|
||||
- name: Create bottle
|
||||
run: |
|
||||
brew bottle -v openblas
|
||||
mkdir bottles
|
||||
mv *.bottle.tar.gz bottles
|
||||
|
||||
- name: Upload bottle
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: openblas--HEAD.catalina.bottle.tar.gz
|
||||
path: bottles
|
||||
|
||||
- name: Show linkage
|
||||
run: brew linkage -v openblas
|
||||
|
||||
- name: Test openblas
|
||||
run: brew test --HEAD --verbose openblas
|
||||
|
||||
- name: Audit openblas formula
|
||||
run: |
|
||||
brew audit --strict openblas
|
||||
brew cat openblas
|
||||
|
||||
- name: Post logs on failure
|
||||
if: failure()
|
||||
run: brew gist-logs --with-hostname -v openblas
|
||||
253
.github/workflows/riscv64_vector.yml
vendored
253
.github/workflows/riscv64_vector.yml
vendored
@@ -1,253 +0,0 @@
|
||||
name: riscv64 zvl256b qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
triple: riscv64-unknown-linux-gnu
|
||||
riscv_gnu_toolchain: https://github.com/riscv-collab/riscv-gnu-toolchain
|
||||
riscv_gnu_toolchain_version: 13.2.0
|
||||
riscv_gnu_toolchain_nightly_download_path: /releases/download/2024.02.02/riscv64-glibc-ubuntu-22.04-llvm-nightly-2024.02.02-nightly.tar.gz
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: RISCV64_ZVL128B
|
||||
opts: TARGET=RISCV64_ZVL128B BINARY=64 ARCH=riscv64
|
||||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=128,elen=64
|
||||
- target: RISCV64_ZVL256B
|
||||
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64
|
||||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install build deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make \
|
||||
libgomp1-riscv64-cross ccache
|
||||
wget ${riscv_gnu_toolchain}/${riscv_gnu_toolchain_nightly_download_path}
|
||||
tar -xvf $(basename ${riscv_gnu_toolchain_nightly_download_path}) -C /opt
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: build OpenBLAS libs
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
|
||||
CC='ccache clang --rtlib=compiler-rt -target ${triple} --sysroot /opt/riscv/sysroot --gcc-toolchain=/opt/riscv/lib/gcc/riscv64-unknown-linux-gnu/${riscv_gnu_toolchain_version}/' \
|
||||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
|
||||
RANLIB='ccache ${triple}-ranlib' \
|
||||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc)
|
||||
|
||||
- name: build OpenBLAS tests
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
|
||||
CC='${triple}-gcc' \
|
||||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
|
||||
RANLIB='ccache ${triple}-ranlib' \
|
||||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) tests
|
||||
|
||||
- name: build lapack-netlib tests
|
||||
working-directory: ./lapack-netlib/TESTING
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
|
||||
CC='${triple}-gcc' \
|
||||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
|
||||
RANLIB='ccache ${triple}-ranlib' \
|
||||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) \
|
||||
LIN/xlintsts LIN/xlintstc LIN/xlintstd LIN/xlintstz LIN/xlintstrfs \
|
||||
LIN/xlintstrfc LIN/xlintstrfd LIN/xlintstrfz LIN/xlintstds \
|
||||
LIN/xlintstzc EIG/xeigtsts EIG/xeigtstc EIG/xeigtstd EIG/xeigtstz \
|
||||
|
||||
- name: OpenBLAS tests
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
export QEMU_CPU=${{ matrix.qemu_cpu }}
|
||||
rm -rf ./test_out
|
||||
mkdir -p ./test_out
|
||||
run_test() { local DIR=$1; local CMD=$2; local DATA=$3; local OUTPUT="./test_out/$DIR.$CMD"; \
|
||||
echo "`pwd`/$DIR/$CMD $DIR/$DATA" >> $OUTPUT; \
|
||||
if [[ -z $DATA ]]; then qemu-riscv64 ./$DIR/$CMD |& tee $OUTPUT ; \
|
||||
else qemu-riscv64 ./$DIR/$CMD < ./$DIR/$DATA |& tee $OUTPUT ; fi ; \
|
||||
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi \
|
||||
}
|
||||
run_test test cblat1 &
|
||||
run_test test cblat2 cblat2.dat &
|
||||
run_test test cblat3 cblat3.dat &
|
||||
run_test test dblat1 &
|
||||
run_test test dblat2 dblat2.dat &
|
||||
run_test test dblat3 dblat3.dat &
|
||||
run_test test sblat1 &
|
||||
run_test test sblat2 sblat2.dat &
|
||||
run_test test sblat3 sblat3.dat &
|
||||
run_test test zblat1 &
|
||||
run_test test zblat2 zblat2.dat &
|
||||
run_test test zblat3 zblat3.dat &
|
||||
run_test ctest xccblat1 &
|
||||
run_test ctest xccblat2 cin2 &
|
||||
run_test ctest xccblat3 cin3 &
|
||||
run_test ctest xdcblat1 &
|
||||
run_test ctest xdcblat2 din2 &
|
||||
run_test ctest xdcblat3 din3 &
|
||||
run_test ctest xscblat1 &
|
||||
run_test ctest xscblat2 sin2 &
|
||||
run_test ctest xscblat3 sin3 &
|
||||
run_test ctest xzcblat1 &
|
||||
run_test ctest xzcblat2 zin2 &
|
||||
run_test ctest xzcblat3 zin3 &
|
||||
wait
|
||||
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
|
||||
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi
|
||||
|
||||
- name: netlib tests
|
||||
shell: bash
|
||||
run: |
|
||||
: # these take a very long time
|
||||
echo "Skipping netlib tests in CI"
|
||||
exit 0
|
||||
: # comment out exit above to enable the tests
|
||||
: # probably we want to identify a subset to run in CI
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
export QEMU_CPU=${{ matrix.qemu_cpu }}
|
||||
rm -rf ./test_out
|
||||
mkdir -p ./test_out
|
||||
run_test() { local OUTPUT="./test_out/$1"; local DATA="./lapack-netlib/TESTING/$2"; local CMD="./lapack-netlib/TESTING/$3"; \
|
||||
echo "$4" >> $OUTPUT; \
|
||||
echo "$CMD" >> $OUTPUT; \
|
||||
qemu-riscv64 $CMD < $DATA |& tee $OUTPUT; \
|
||||
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi; \
|
||||
if grep -q fail $OUTPUT ; then echo "*** FAIL: log contains 'fail'" >> $OUTPUT ; fi ; \
|
||||
if grep -q rror $OUTPUT | grep -v -q "passed" | grep -v "largest error" ; then echo "*** FAIL: log contains 'error'" >> $OUTPUT ; fi \
|
||||
}
|
||||
run_test stest.out stest.in LIN/xlintsts "Testing REAL LAPACK linear equation routines" &
|
||||
run_test ctest.out ctest.in LIN/xlintstc "Testing COMPLEX LAPACK linear equation routines" &
|
||||
run_test dtest.out dtest.in LIN/xlintstd "Testing DOUBLE PRECISION LAPACK linear equation routines" &
|
||||
run_test ztest.out ztest.in LIN/xlintstz "Testing COMPLEX16 LAPACK linear equation routines" &
|
||||
run_test dstest.out dstest.in LIN/xlintstds "Testing SINGLE-DOUBLE PRECISION LAPACK prototype linear equation routines" &
|
||||
run_test zctest.out zctest.in LIN/xlintstzc "Testing COMPLEX-COMPLEX16 LAPACK prototype linear equation routines" &
|
||||
run_test stest_rfp.out stest_rfp.in LIN/xlintstrfs "Testing REAL LAPACK RFP prototype linear equation routines" &
|
||||
run_test dtest_rfp.out dtest_rfp.in LIN/xlintstrfd "Testing DOUBLE PRECISION LAPACK RFP prototype linear equation routines" &
|
||||
run_test ctest_rfp.out ctest_rfp.in LIN/xlintstrfc "Testing COMPLEX LAPACK RFP prototype linear equation routines" &
|
||||
run_test ztest_rfp.out ztest_rfp.in LIN/xlintstrfz "Testing COMPLEX16 LAPACK RFP prototype linear equation routines" &
|
||||
run_test snep.out nep.in EIG/xeigtsts "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test ssep.out sep.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test sse2.out se2.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test ssvd.out svd.in EIG/xeigtsts "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test sec.out sec.in EIG/xeigtsts "SEC - Testing REAL Eigen Condition Routines" &
|
||||
run_test sed.out sed.in EIG/xeigtsts "SEV - Testing REAL Nonsymmetric Eigenvalue Driver" &
|
||||
run_test sgg.out sgg.in EIG/xeigtsts "SGG - Testing REAL Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test sgd.out sgd.in EIG/xeigtsts "SGD - Testing REAL Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test ssb.out ssb.in EIG/xeigtsts "SSB - Testing REAL Symmetric Eigenvalue Problem routines" &
|
||||
run_test ssg.out ssg.in EIG/xeigtsts "SSG - Testing REAL Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test sbal.out sbal.in EIG/xeigtsts "SGEBAL - Testing the balancing of a REAL general matrix" &
|
||||
run_test sbak.out sbak.in EIG/xeigtsts "SGEBAK - Testing the back transformation of a REAL balanced matrix" &
|
||||
run_test sgbal.out sgbal.in EIG/xeigtsts "SGGBAL - Testing the balancing of a pair of REAL general matrices" &
|
||||
run_test sgbak.out sgbak.in EIG/xeigtsts "SGGBAK - Testing the back transformation of a pair of REAL balanced matrices" &
|
||||
run_test sbb.out sbb.in EIG/xeigtsts "SBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test sglm.out glm.in EIG/xeigtsts "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test sgqr.out gqr.in EIG/xeigtsts "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test sgsv.out gsv.in EIG/xeigtsts "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test scsd.out csd.in EIG/xeigtsts "CSD - Testing CS Decomposition routines" &
|
||||
run_test slse.out lse.in EIG/xeigtsts "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
run_test cnep.out nep.in EIG/xeigtstc "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test csep.out sep.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test cse2.out se2.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test csvd.out svd.in EIG/xeigtstc "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test cec.out cec.in EIG/xeigtstc "CEC - Testing COMPLEX Eigen Condition Routines" &
|
||||
run_test ced.out ced.in EIG/xeigtstc "CES - Testing COMPLEX Nonsymmetric Schur Form Driver" &
|
||||
run_test cgg.out cgg.in EIG/xeigtstc "CGG - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test cgd.out cgd.in EIG/xeigtstc "CGD - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test csb.out csb.in EIG/xeigtstc "CHB - Testing Hermitian Eigenvalue Problem routines" &
|
||||
run_test csg.out csg.in EIG/xeigtstc "CSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test cbal.out cbal.in EIG/xeigtstc "CGEBAL - Testing the balancing of a COMPLEX general matrix" &
|
||||
run_test cbak.out cbak.in EIG/xeigtstc "CGEBAK - Testing the back transformation of a COMPLEX balanced matrix" &
|
||||
run_test cgbal.out cgbal.in EIG/xeigtstc "CGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
|
||||
run_test cgbak.out cgbak.in EIG/xeigtstc "CGGBAK - Testing the back transformation of a pair of COMPLEX balanced matrices" &
|
||||
run_test cbb.out cbb.in EIG/xeigtstc "CBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test cglm.out glm.in EIG/xeigtstc "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test cgqr.out gqr.in EIG/xeigtstc "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test cgsv.out gsv.in EIG/xeigtstc "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test ccsd.out csd.in EIG/xeigtstc "CSD - Testing CS Decomposition routines" &
|
||||
run_test clse.out lse.in EIG/xeigtstc "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
run_test dnep.out nep.in EIG/xeigtstd "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test dsep.out sep.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test dse2.out se2.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test dsvd.out svd.in EIG/xeigtstd "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test dec.out dec.in EIG/xeigtstd "DEC - Testing DOUBLE PRECISION Eigen Condition Routines" &
|
||||
run_test ded.out ded.in EIG/xeigtstd "DEV - Testing DOUBLE PRECISION Nonsymmetric Eigenvalue Driver" &
|
||||
run_test dgg.out dgg.in EIG/xeigtstd "DGG - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test dgd.out dgd.in EIG/xeigtstd "DGD - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test dsb.out dsb.in EIG/xeigtstd "DSB - Testing DOUBLE PRECISION Symmetric Eigenvalue Problem routines" &
|
||||
run_test dsg.out dsg.in EIG/xeigtstd "DSG - Testing DOUBLE PRECISION Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test dbal.out dbal.in EIG/xeigtstd "DGEBAL - Testing the balancing of a DOUBLE PRECISION general matrix" &
|
||||
run_test dbak.out dbak.in EIG/xeigtstd "DGEBAK - Testing the back transformation of a DOUBLE PRECISION balanced matrix" &
|
||||
run_test dgbal.out dgbal.in EIG/xeigtstd "DGGBAL - Testing the balancing of a pair of DOUBLE PRECISION general matrices" &
|
||||
run_test dgbak.out dgbak.in EIG/xeigtstd "DGGBAK - Testing the back transformation of a pair of DOUBLE PRECISION balanced matrices" &
|
||||
run_test dbb.out dbb.in EIG/xeigtstd "DBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test dglm.out glm.in EIG/xeigtstd "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test dgqr.out gqr.in EIG/xeigtstd "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test dgsv.out gsv.in EIG/xeigtstd "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test dcsd.out csd.in EIG/xeigtstd "CSD - Testing CS Decomposition routines" &
|
||||
run_test dlse.out lse.in EIG/xeigtstd "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
run_test znep.out nep.in EIG/xeigtstz "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test zsep.out sep.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test zse2.out se2.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test zsvd.out svd.in EIG/xeigtstz "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test zec.out zec.in EIG/xeigtstz "ZEC - Testing COMPLEX16 Eigen Condition Routines" &
|
||||
run_test zed.out zed.in EIG/xeigtstz "ZES - Testing COMPLEX16 Nonsymmetric Schur Form Driver" &
|
||||
run_test zgg.out zgg.in EIG/xeigtstz "ZGG - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test zgd.out zgd.in EIG/xeigtstz "ZGD - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test zsb.out zsb.in EIG/xeigtstz "ZHB - Testing Hermitian Eigenvalue Problem routines" &
|
||||
run_test zsg.out zsg.in EIG/xeigtstz "ZSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test zbal.out zbal.in EIG/xeigtstz "ZGEBAL - Testing the balancing of a COMPLEX16 general matrix" &
|
||||
run_test zbak.out zbak.in EIG/xeigtstz "ZGEBAK - Testing the back transformation of a COMPLEX16 balanced matrix" &
|
||||
run_test zgbal.out zgbal.in EIG/xeigtstz "ZGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
|
||||
run_test zgbak.out zgbak.in EIG/xeigtstz "ZGGBAK - Testing the back transformation of a pair of COMPLEX16 balanced matrices" &
|
||||
run_test zbb.out zbb.in EIG/xeigtstz "ZBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test zglm.out glm.in EIG/xeigtstz "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test zgqr.out gqr.in EIG/xeigtstz "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test zgsv.out gsv.in EIG/xeigtstz "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test zcsd.out csd.in EIG/xeigtstz "CSD - Testing CS Decomposition routines" &
|
||||
run_test zlse.out lse.in EIG/xeigtstz "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
wait
|
||||
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
|
||||
python ./lapack-netlib/lapack_testing.py -d ./test_out -e > netlib_summary
|
||||
TOTALS="$(grep 'ALL PRECISIONS' netlib_summary)"
|
||||
NUMERICAL_ERRORS=-1
|
||||
OTHER_ERRORS=-1
|
||||
. <(awk '/ALL PRECISIONS/{printf "NUMERICAL_ERRORS=%s\nOTHER_ERRORS=%s\n", $5, $7}' netlib_summary
|
||||
if (( NUMERICAL_ERRORS != 0 )) || (( OTHER_ERRORS != 0 )) ; then cat netlib_summary ; FAILURES=1 ; fi
|
||||
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi
|
||||
44
.gitignore
vendored
44
.gitignore
vendored
@@ -5,7 +5,6 @@
|
||||
*.def
|
||||
*.o
|
||||
*.out
|
||||
*.tmp
|
||||
lapack-3.1.1
|
||||
lapack-3.1.1.tgz
|
||||
lapack-3.4.1
|
||||
@@ -14,25 +13,8 @@ lapack-3.4.2
|
||||
lapack-3.4.2.tgz
|
||||
lapack-netlib/make.inc
|
||||
lapack-netlib/lapacke/include/lapacke_mangling.h
|
||||
lapack-netlib/SRC/la_constants.mod
|
||||
lapack-netlib/TESTING/testing_results.txt
|
||||
lapack-netlib/INSTALL/test*
|
||||
lapack-netlib/TESTING/xeigtstc
|
||||
lapack-netlib/TESTING/xeigtstd
|
||||
lapack-netlib/TESTING/xeigtsts
|
||||
lapack-netlib/TESTING/xeigtstz
|
||||
lapack-netlib/TESTING/xlintstc
|
||||
lapack-netlib/TESTING/xlintstd
|
||||
lapack-netlib/TESTING/xlintstds
|
||||
lapack-netlib/TESTING/xlintstrfc
|
||||
lapack-netlib/TESTING/xlintstrfd
|
||||
lapack-netlib/TESTING/xlintstrfs
|
||||
lapack-netlib/TESTING/xlintstrfz
|
||||
lapack-netlib/TESTING/xlintsts
|
||||
lapack-netlib/TESTING/xlintstz
|
||||
lapack-netlib/TESTING/xlintstzc
|
||||
*.so
|
||||
*.so.*
|
||||
*.a
|
||||
.svn
|
||||
*~
|
||||
@@ -47,65 +29,39 @@ config_last.h
|
||||
getarch
|
||||
getarch_2nd
|
||||
utest/openblas_utest
|
||||
utest/openblas_utest_ext
|
||||
ctest/xccblat1
|
||||
ctest/xccblat2
|
||||
ctest/xccblat3
|
||||
ctest/xccblat3_3m
|
||||
ctest/xdcblat1
|
||||
ctest/xdcblat2
|
||||
ctest/xdcblat3
|
||||
ctest/xdcblat3_3m
|
||||
ctest/xscblat1
|
||||
ctest/xscblat2
|
||||
ctest/xscblat3
|
||||
ctest/xscblat3_3m
|
||||
ctest/xzcblat1
|
||||
ctest/xzcblat2
|
||||
ctest/xzcblat3
|
||||
ctest/xzcblat3_3m
|
||||
exports/linktest.c
|
||||
exports/linux.def
|
||||
kernel/setparam_*.c
|
||||
kernel/kernel_*.h
|
||||
test/CBLAT2.SUMM
|
||||
test/CBLAT3.SUMM
|
||||
test/CBLAT3_3M.SUMM
|
||||
test/DBLAT2.SUMM
|
||||
test/DBLAT3.SUMM
|
||||
test/DBLAT3_3M.SUMM
|
||||
test/SBLAT2.SUMM
|
||||
test/SBLAT3.SUMM
|
||||
test/SBLAT3_3M.SUMM
|
||||
test/ZBLAT2.SUMM
|
||||
test/ZBLAT3.SUMM
|
||||
test/ZBLAT3_3M.SUMM
|
||||
test/SHBLAT3.SUMM
|
||||
test/SBBLAT3.SUMM
|
||||
test/cblat1
|
||||
test/cblat2
|
||||
test/cblat3
|
||||
test/cblat3_3m
|
||||
test/dblat1
|
||||
test/dblat2
|
||||
test/dblat3
|
||||
test/dblat3_3m
|
||||
test/sblat1
|
||||
test/sblat2
|
||||
test/sblat3
|
||||
test/sblat3_3m
|
||||
test/test_shgemm
|
||||
test/test_sbgemm
|
||||
test/zblat1
|
||||
test/zblat2
|
||||
test/zblat3
|
||||
test/zblat3_3m
|
||||
build
|
||||
build.*
|
||||
*.swp
|
||||
benchmark/*.goto
|
||||
benchmark/smallscaling
|
||||
.vscode
|
||||
CMakeCache.txt
|
||||
CMakeFiles/*
|
||||
.vscode
|
||||
|
||||
326
.travis.yml
326
.travis.yml
@@ -1,320 +1,24 @@
|
||||
# XXX: Precise is already deprecated, new default is Trusty.
|
||||
# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming
|
||||
dist: focal
|
||||
sudo: true
|
||||
language: c
|
||||
compiler:
|
||||
- gcc
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- &test-ubuntu
|
||||
# os: linux
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gfortran
|
||||
# before_script: &common-before
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
# script:
|
||||
# - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# - make -C test $COMMON_FLAGS $BTYPE
|
||||
# - make -C ctest $COMMON_FLAGS $BTYPE
|
||||
# - make -C utest $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
os: linux
|
||||
arch: ppc64le
|
||||
before_script: &common-before
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
||||
script:
|
||||
- travis_wait 50 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=PPC64LE_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
env:
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1"
|
||||
- TARGET_BOX=LINUX32 BTYPE="BINARY=32"
|
||||
- TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
os: linux
|
||||
arch: s390x
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
|
||||
- sudo apt-get install --only-upgrade binutils
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=IBMZ_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq gfortran
|
||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||
|
||||
- <<: *test-ubuntu
|
||||
os: linux
|
||||
dist: focal
|
||||
arch: s390x
|
||||
compiler: clang
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
|
||||
- sudo apt-get install --only-upgrade binutils
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=IBMZ_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang"
|
||||
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||
|
||||
# - <<: *test-ubuntu
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64 INTERFACE64=1"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
# compiler: clang
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64 CC=clang"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
# compiler: clang
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64 INTERFACE64=1 CC=clang"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
# addons:
|
||||
# apt:
|
||||
# packages:
|
||||
# - gcc-multilib
|
||||
# - gfortran-multilib
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX32
|
||||
# - BTYPE="BINARY=32"
|
||||
#
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
dist: bionic
|
||||
compiler: gcc
|
||||
before_script:
|
||||
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
|
||||
- sudo apt-get update
|
||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||
script:
|
||||
- travis_wait 50 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=PPC64LE_LINUX_P9
|
||||
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
dist: bionic
|
||||
compiler: gcc
|
||||
before_script:
|
||||
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
|
||||
- sudo apt-get update
|
||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||
script:
|
||||
- travis_wait 50 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=PPC64LE_LINUX_P9
|
||||
|
||||
# - os: linux
|
||||
# compiler: gcc
|
||||
# addons:
|
||||
# apt:
|
||||
# packages:
|
||||
# - binutils-mingw-w64-x86-64
|
||||
# - gcc-mingw-w64-x86-64
|
||||
# - gfortran-mingw-w64-x86-64
|
||||
# before_script: *common-before
|
||||
# script:
|
||||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - TARGET_BOX=WIN64
|
||||
# - BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
#
|
||||
# Build & test on Alpine Linux inside chroot, i.e. on system with musl libc.
|
||||
# These jobs needs sudo, so Travis runs them on VM-based infrastructure
|
||||
# which is slower than container-based infrastructure used for jobs
|
||||
# that don't require sudo.
|
||||
# - &test-alpine
|
||||
# os: linux
|
||||
# dist: trusty
|
||||
# sudo: true
|
||||
# language: minimal
|
||||
# before_install:
|
||||
# - "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
|
||||
# && echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
|
||||
# - alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
|
||||
# install:
|
||||
# - sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
|
||||
# before_script: *common-before
|
||||
# script:
|
||||
# # XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
|
||||
# - alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
|
||||
# - alpine make -C test $COMMON_FLAGS $BTYPE
|
||||
# - alpine make -C ctest $COMMON_FLAGS $BTYPE
|
||||
# - alpine make -C utest $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64"
|
||||
|
||||
# XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS,
|
||||
# but only on Travis CI, cannot reproduce it elsewhere.
|
||||
#- &test-alpine-openmp
|
||||
# <<: *test-alpine
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
# - <<: *test-alpine
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64 INTERFACE64=1"
|
||||
#
|
||||
# # Build with the same flags as Alpine do in OpenBLAS package.
|
||||
# - <<: *test-alpine
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"
|
||||
|
||||
# - &test-cmake
|
||||
# os: linux
|
||||
# compiler: clang
|
||||
# addons:
|
||||
# apt:
|
||||
# packages:
|
||||
# - gfortran
|
||||
# - cmake
|
||||
# dist: trusty
|
||||
# sudo: true
|
||||
# before_script:
|
||||
# - COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
|
||||
# script:
|
||||
# - mkdir build
|
||||
# - CONFIG=Release
|
||||
# - cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
|
||||
# - cmake --build build --config $CONFIG -- -j2
|
||||
# env:
|
||||
# - CMAKE=1
|
||||
# - <<: *test-cmake
|
||||
# env:
|
||||
# - CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1"
|
||||
# - <<: *test-cmake
|
||||
# compiler: gcc
|
||||
# env:
|
||||
# - CMAKE=1
|
||||
|
||||
# - &test-macos
|
||||
# os: osx
|
||||
# osx_image: xcode11.5
|
||||
# before_script:
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
# script:
|
||||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-9"
|
||||
#
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode12
|
||||
# before_script:
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
# - brew update
|
||||
# script:
|
||||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10"
|
||||
#
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode12
|
||||
# before_script:
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
# - brew update
|
||||
# script:
|
||||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
|
||||
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode10
|
||||
# env:
|
||||
# - BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"
|
||||
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode11.5
|
||||
# before_script:
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
# - brew update
|
||||
# env:
|
||||
# - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
# - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode11.5
|
||||
# env:
|
||||
## - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
## - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
|
||||
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
# - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch armv7 -miphoneos-version-min=5.1"
|
||||
# - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
|
||||
|
||||
- &test-neoversen1
|
||||
os: linux
|
||||
arch: arm64
|
||||
dist: focal
|
||||
group: edge
|
||||
virt: lxd
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gfortran
|
||||
script:
|
||||
- travis_wait 45 make && make lapack-test
|
||||
env:
|
||||
- TARGET_BOX=NEOVERSE_N1
|
||||
|
||||
- &test-neon1-gcc8
|
||||
os: linux
|
||||
arch: arm64
|
||||
dist: focal
|
||||
group: edge
|
||||
virt: lxd
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gcc-8
|
||||
- gfortran-8
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 CC=gcc-8 FC=gfortran-8 DYNAMIC_ARCH=1
|
||||
env:
|
||||
- TARGET_BOX=NEOVERSE_N1-GCC8
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
|
||||
notifications:
|
||||
webhooks:
|
||||
urls:
|
||||
- https://webhooks.gitter.im/e/8a6e4470a0cebd090344
|
||||
on_success: change # options: [always|never|change] default: always
|
||||
on_failure: always # options: [always|never|change] default: always
|
||||
on_start: never # options: [always|never|change] default: always
|
||||
- develop
|
||||
@@ -1,9 +1,5 @@
|
||||
Thank you for the support.
|
||||
|
||||
### [2019.12/2021.9] [Chan-Zuckerberg Foundation EOSS Initiative](https://chanzuckerberg.com/eoss/)
|
||||
|
||||
Between December 2019 and September 2021, development and maintaining of OpenBLAS was funded in part by the Chan-Zuckerberg Foundation in the context of two grants awarded to the NumPy Foundation and managed by NumFocus (Cycles 1 and 3 of the Essential Open Source Software for Science (EOSS) Initiative of the Chan-Zuckerberg Foundation)
|
||||
|
||||
### [2013.8] [Testbed for OpenBLAS project](https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project)
|
||||
|
||||
https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project/pledges
|
||||
|
||||
631
CMakeLists.txt
631
CMakeLists.txt
@@ -1,631 +0,0 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
##
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.5)
|
||||
|
||||
project(OpenBLAS C ASM)
|
||||
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 27)
|
||||
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
# Adhere to GNU filesystem layout conventions
|
||||
include(GNUInstallDirs)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
#######
|
||||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF)
|
||||
|
||||
option(BUILD_LAPACK_DEPRECATED "When building LAPACK, include also some older, deprecated routines" ON)
|
||||
|
||||
option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON)
|
||||
|
||||
option(BUILD_BENCHMARKS "Build the collection of BLAS/LAPACK benchmarks" OFF)
|
||||
|
||||
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)
|
||||
|
||||
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF)
|
||||
|
||||
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64 or ppc only)" OFF)
|
||||
|
||||
option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
|
||||
|
||||
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
|
||||
|
||||
option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF)
|
||||
|
||||
option(USE_PERL "Use the older PERL scripts for build preparation instead of universal shell scripts" OFF)
|
||||
|
||||
option(NO_WARMUP "Do not run a benchmark on each startup just to find the best location for the memory buffer" ON)
|
||||
|
||||
option(FIXED_LIBNAME "Use a non-versioned name for the library and no symbolic linking to variant names" OFF)
|
||||
|
||||
set(LIBNAMEPREFIX "" CACHE STRING "Add a prefix to the openblas part of the library name" )
|
||||
set(LIBNAMESUFFIX "" CACHE STRING "Add a suffix after the openblas part of the library name" )
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
|
||||
else()
|
||||
set(NO_AFFINITY 1)
|
||||
endif()
|
||||
|
||||
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
|
||||
|
||||
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
|
||||
option(BUILD_STATIC_LIBS "Build static library" OFF)
|
||||
if(NOT BUILD_STATIC_LIBS AND NOT BUILD_SHARED_LIBS)
|
||||
set(BUILD_STATIC_LIBS ON CACHE BOOL "Build static library" FORCE)
|
||||
endif()
|
||||
if((BUILD_STATIC_LIBS AND BUILD_SHARED_LIBS) AND MSVC)
|
||||
message(WARNING "Could not enable both BUILD_STATIC_LIBS and BUILD_SHARED_LIBS with MSVC, Disable BUILD_SHARED_LIBS")
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static library" FORCE)
|
||||
endif()
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoids conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in the shared library to avoid conflicts with other BLAS libraries" )
|
||||
|
||||
set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" )
|
||||
|
||||
#######
|
||||
if(BUILD_WITHOUT_LAPACK)
|
||||
set(NO_LAPACK 1)
|
||||
set(NO_LAPACKE 1)
|
||||
endif()
|
||||
|
||||
if(BUILD_WITHOUT_CBLAS)
|
||||
set(NO_CBLAS 1)
|
||||
endif()
|
||||
|
||||
#######
|
||||
|
||||
if(MSVC AND MSVC_STATIC_CRT)
|
||||
set(CompilerFlags
|
||||
CMAKE_CXX_FLAGS
|
||||
CMAKE_CXX_FLAGS_DEBUG
|
||||
CMAKE_CXX_FLAGS_RELEASE
|
||||
CMAKE_C_FLAGS
|
||||
CMAKE_C_FLAGS_DEBUG
|
||||
CMAKE_C_FLAGS_RELEASE
|
||||
)
|
||||
foreach(CompilerFlag ${CompilerFlags})
|
||||
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
|
||||
|
||||
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX}${SUFFIX64_UNDERSCORE})
|
||||
|
||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
|
||||
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
list(APPEND BLASDIRS kernel)
|
||||
endif ()
|
||||
|
||||
if (DEFINED SANITY_CHECK)
|
||||
list(APPEND BLASDIRS reference)
|
||||
endif ()
|
||||
|
||||
set(SUBDIRS ${BLASDIRS})
|
||||
if (NOT NO_LAPACK)
|
||||
if(BUILD_RELAPACK)
|
||||
list(APPEND SUBDIRS relapack/src)
|
||||
endif()
|
||||
list(APPEND SUBDIRS lapack)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED BUILD_BFLOAT16)
|
||||
set (BUILD_BFLOAT16 false)
|
||||
endif ()
|
||||
# set which float types we want to build for
|
||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
||||
# if none are defined, build for all
|
||||
# set(BUILD_BFLOAT16 true)
|
||||
set(BUILD_SINGLE true)
|
||||
set(BUILD_DOUBLE true)
|
||||
set(BUILD_COMPLEX true)
|
||||
set(BUILD_COMPLEX16 true)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED BUILD_MATGEN)
|
||||
set(BUILD_MATGEN true)
|
||||
endif()
|
||||
|
||||
set(FLOAT_TYPES "")
|
||||
if (BUILD_SINGLE)
|
||||
message(STATUS "Building Single Precision")
|
||||
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
|
||||
endif ()
|
||||
|
||||
if (BUILD_DOUBLE)
|
||||
message(STATUS "Building Double Precision")
|
||||
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX)
|
||||
message(STATUS "Building Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX16)
|
||||
message(STATUS "Building Double Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||
endif ()
|
||||
|
||||
if (BUILD_BFLOAT16)
|
||||
message(STATUS "Building Half Precision")
|
||||
# list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
|
||||
endif ()
|
||||
|
||||
#Set default output directory
|
||||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
if(MSVC)
|
||||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib/Debug)
|
||||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib/Release)
|
||||
endif ()
|
||||
|
||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
|
||||
set(TARGET_OBJS "")
|
||||
foreach (SUBDIR ${SUBDIRS})
|
||||
add_subdirectory(${SUBDIR})
|
||||
string(REPLACE "/" "_" subdir_obj ${SUBDIR})
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>")
|
||||
endforeach ()
|
||||
|
||||
# netlib:
|
||||
|
||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
|
||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
||||
if (NOT NO_LAPACK)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
|
||||
if (NOT NO_LAPACKE)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# Only generate .def for dll on MSVC and always produce pdb files for debug and release
|
||||
if(MSVC)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
|
||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||
endif()
|
||||
|
||||
if (${DYNAMIC_ARCH})
|
||||
add_subdirectory(kernel)
|
||||
foreach(TARGET_CORE ${DYNAMIC_CORE})
|
||||
message("${TARGET_CORE}")
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:kernel_${TARGET_CORE}>")
|
||||
endforeach()
|
||||
endif ()
|
||||
|
||||
# add objects to the openblas lib
|
||||
if(NOT NO_LAPACK)
|
||||
add_library(LAPACK_OVERRIDES OBJECT ${LA_SOURCES})
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACK_OVERRIDES>")
|
||||
endif()
|
||||
if(NOT NO_LAPACKE)
|
||||
add_library(LAPACKE OBJECT ${LAPACKE_SOURCES})
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACKE>")
|
||||
endif()
|
||||
#if(BUILD_RELAPACK)
|
||||
# add_library(RELAPACK OBJECT ${RELA_SOURCES})
|
||||
# list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:RELAPACK>")
|
||||
#endif()
|
||||
set(OpenBLAS_LIBS "")
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
target_include_directories(${OpenBLAS_LIBNAME}_static INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
|
||||
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_static)
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_library(${OpenBLAS_LIBNAME}_shared SHARED ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
target_include_directories(${OpenBLAS_LIBNAME}_shared INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
|
||||
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_shared)
|
||||
endif()
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_static)
|
||||
else()
|
||||
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_shared)
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
|
||||
|
||||
# Android needs to explicitly link against libm
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "AIX|Android|Linux|FreeBSD|OpenBSD|NetBSD|DragonFly|Darwin")
|
||||
if(BUILD_STATIC_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static m)
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_shared m)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Seems that this hack doesn't required since macOS 11 Big Sur
|
||||
if (APPLE AND BUILD_SHARED_LIBS AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
|
||||
set (CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
|
||||
if (NOT NOFORTRAN)
|
||||
set (CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1)
|
||||
set (CMAKE_Fortran_CREATE_SHARED_LIBRARY
|
||||
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
|
||||
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
|
||||
"sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '"
|
||||
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load dummy.o -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'"
|
||||
"sh -c 'ls -l ${CMAKE_BINARY_DIR}/lib'")
|
||||
else ()
|
||||
set (CMAKE_C_CREATE_SHARED_LIBRARY
|
||||
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
|
||||
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
|
||||
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'")
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
# Handle MSVC exports
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
|
||||
else()
|
||||
# Creates verbose .def file (51KB vs 18KB)
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_shared PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Set output for libopenblas
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES EXPORT_NAME "OpenBLAS")
|
||||
|
||||
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
|
||||
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
|
||||
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
endforeach()
|
||||
|
||||
enable_testing()
|
||||
|
||||
if (USE_THREAD)
|
||||
# Add threading library to linker
|
||||
find_package(Threads)
|
||||
if (THREADS_HAVE_PTHREAD_ARG)
|
||||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES
|
||||
COMPILE_OPTIONS "-pthread"
|
||||
INTERFACE_COMPILE_OPTIONS "-pthread"
|
||||
)
|
||||
endif()
|
||||
if(BUILD_STATIC_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_shared ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#if (MSVC OR NOT NOFORTRAN)
|
||||
if (NOT NO_CBLAS)
|
||||
if (NOT ONLY_CBLAS)
|
||||
# Broken without fortran on unix
|
||||
add_subdirectory(utest)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT NOFORTRAN)
|
||||
if (NOT ONLY_CBLAS)
|
||||
# Build test and ctest
|
||||
add_subdirectory(test)
|
||||
endif()
|
||||
if (BUILD_TESTING AND NOT BUILD_WITHOUT_LAPACK)
|
||||
add_subdirectory(lapack-netlib/TESTING)
|
||||
endif()
|
||||
endif()
|
||||
if(NOT NO_CBLAS)
|
||||
if (NOT ONLY_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
endif()
|
||||
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
|
||||
add_subdirectory(cpp_thread_test)
|
||||
endif()
|
||||
|
||||
if (NOT FIXED_LIBNAME)
|
||||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES
|
||||
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
|
||||
SOVERSION ${OpenBLAS_MAJOR_VERSION}
|
||||
)
|
||||
endif()
|
||||
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
|
||||
if (NOT MSVC)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_shared "-Wl,-allow-multiple-definition")
|
||||
else()
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
|
||||
if (NOT DEFINED ARCH)
|
||||
set(ARCH_IN "x86_64")
|
||||
else()
|
||||
set(ARCH_IN ${ARCH})
|
||||
endif()
|
||||
|
||||
if (${CORE} STREQUAL "generic")
|
||||
set(ARCH_IN "GENERIC")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED EXPRECISION)
|
||||
set(EXPRECISION_IN 0)
|
||||
else()
|
||||
set(EXPRECISION_IN ${EXPRECISION})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_CBLAS)
|
||||
set(NO_CBLAS_IN 0)
|
||||
else()
|
||||
set(NO_CBLAS_IN ${NO_CBLAS})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACK)
|
||||
set(NO_LAPACK_IN 0)
|
||||
else()
|
||||
set(NO_LAPACK_IN ${NO_LAPACK})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACKE)
|
||||
set(NO_LAPACKE_IN 0)
|
||||
else()
|
||||
set(NO_LAPACKE_IN ${NO_LAPACKE})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NEED2UNDERSCORES)
|
||||
set(NEED2UNDERSCORES_IN 0)
|
||||
else()
|
||||
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED ONLY_CBLAS)
|
||||
set(ONLY_CBLAS_IN 0)
|
||||
else()
|
||||
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED BU)
|
||||
set(BU _)
|
||||
endif()
|
||||
|
||||
if (NOT ${SYMBOLPREFIX} STREQUAL "")
|
||||
message(STATUS "adding prefix ${SYMBOLPREFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
|
||||
endif()
|
||||
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
|
||||
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
|
||||
endif()
|
||||
|
||||
if (${BUILD_LAPACK_DEPRECATED})
|
||||
set (BLD 1)
|
||||
else ()
|
||||
set (BLD 0)
|
||||
endif()
|
||||
if (${BUILD_BFLOAT16})
|
||||
set (BBF16 1)
|
||||
else ()
|
||||
set (BBF16 0)
|
||||
endif()
|
||||
if (${BUILD_SINGLE})
|
||||
set (BS 1)
|
||||
else ()
|
||||
set (BS 0)
|
||||
endif()
|
||||
if (${BUILD_DOUBLE})
|
||||
set (BD 1)
|
||||
else ()
|
||||
set (BD 0)
|
||||
endif()
|
||||
if (${BUILD_COMPLEX})
|
||||
set (BC 1)
|
||||
else ()
|
||||
set (BC 0)
|
||||
endif()
|
||||
if (${BUILD_COMPLEX16})
|
||||
set (BZ 1)
|
||||
else ()
|
||||
set (BZ 0)
|
||||
endif()
|
||||
if (NOT USE_PERL)
|
||||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
|
||||
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
|
||||
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
|
||||
COMMENT "renaming symbols"
|
||||
)
|
||||
else()
|
||||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
|
||||
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
|
||||
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
|
||||
COMMENT "renaming symbols"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_BENCHMARKS)
|
||||
#find_package(OpenMP REQUIRED)
|
||||
file(GLOB SOURCES "benchmark/*.c")
|
||||
if (NOT USE_OPENMP)
|
||||
file(GLOB REMFILE "benchmark/smallscaling.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
endif()
|
||||
if (BUILD_WITHOUT_LAPACK)
|
||||
file(GLOB REMFILE "benchmark/cholesky.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/geev.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/gesv.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/getri.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/potrf.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/spmv.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/symv.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/linpack.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
endif()
|
||||
if (NOT USE_GEMM3M)
|
||||
file(GLOB REMFILE "benchmark/gemm3m.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
endif()
|
||||
foreach(source ${SOURCES})
|
||||
get_filename_component(name ${source} NAME_WE)
|
||||
if ((NOT ${name} STREQUAL "zdot-intel") AND (NOT ${name} STREQUAL "cula_wrapper"))
|
||||
set(defines DEFAULT COMPLEX DOUBLE "COMPLEX\;DOUBLE")
|
||||
foreach(define ${defines})
|
||||
set(target_name "benchmark_${name}")
|
||||
if (NOT "${define}" STREQUAL "DEFAULT")
|
||||
string(JOIN "_" define_str ${define})
|
||||
set(target_name "${target_name}_${define_str}")
|
||||
endif()
|
||||
if ((NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX_DOUBLE") AND
|
||||
(NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX_DOUBLE") AND
|
||||
(NOT ${target_name} STREQUAL "benchmark_max_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_max_COMPLEX_DOUBLE") AND
|
||||
(NOT ${target_name} STREQUAL "benchmark_min_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_min_COMPLEX_DOUBLE"))
|
||||
add_executable(${target_name} ${source})
|
||||
target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} )
|
||||
# target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} OpenMP::OpenMP_C)
|
||||
if (NOT "${define}" STREQUAL "DEFAULT")
|
||||
target_compile_definitions(${target_name} PRIVATE ${define})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
|
||||
# Install project
|
||||
|
||||
# Install libraries
|
||||
if(BUILD_SHARED_LIBS AND BUILD_STATIC_LIBS)
|
||||
install(TARGETS ${OpenBLAS_LIBNAME}_shared
|
||||
EXPORT "OpenBLAS${SUFFIX64}Targets"
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
install(TARGETS ${OpenBLAS_LIBNAME}_static
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
else()
|
||||
install(TARGETS ${OpenBLAS_LIBS}
|
||||
EXPORT "OpenBLAS${SUFFIX64}Targets"
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
endif()
|
||||
|
||||
# Install headers
|
||||
set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
|
||||
set(CMAKE_INSTALL_FULL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h)
|
||||
file(WRITE ${OPENBLAS_CONFIG_H} "#ifndef OPENBLAS_CONFIG_H\n")
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_CONFIG_H\n")
|
||||
file(STRINGS ${PROJECT_BINARY_DIR}/config.h __lines)
|
||||
foreach(line ${__lines})
|
||||
string(REPLACE "#define " "" line ${line})
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_${line}\n")
|
||||
endforeach()
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_VERSION \"OpenBLAS ${OpenBLAS_VERSION}\"\n")
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS)
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "${OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS}\n")
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#endif /* OPENBLAS_CONFIG_H */\n")
|
||||
install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
if(NOT NOFORTRAN)
|
||||
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
set(F77BLAS_H ${CMAKE_BINARY_DIR}/generated/f77blas.h)
|
||||
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n")
|
||||
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n")
|
||||
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n")
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS)
|
||||
file(APPEND ${F77BLAS_H} "${COMMON_INTERFACE_H_CONTENTS}\n")
|
||||
file(APPEND ${F77BLAS_H} "#endif")
|
||||
install (FILES ${F77BLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT NO_CBLAS)
|
||||
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
|
||||
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
if (NOT ${SYMBOLPREFIX} STREQUAL "")
|
||||
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
endif()
|
||||
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
|
||||
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
endif()
|
||||
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
|
||||
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT NO_LAPACKE)
|
||||
message (STATUS "Copying LAPACKE header files to ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_dependencies( ${OpenBLAS_LIBNAME}_static genlapacke)
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_dependencies( ${OpenBLAS_LIBNAME}_shared genlapacke)
|
||||
endif()
|
||||
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
|
||||
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
ADD_CUSTOM_TARGET(genlapacke
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
|
||||
)
|
||||
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
# Install pkg-config files
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
|
||||
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
|
||||
|
||||
|
||||
set(PN OpenBLAS)
|
||||
set(CMAKECONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PN}${SUFFIX64}")
|
||||
configure_package_config_file(cmake/${PN}Config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake"
|
||||
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
|
||||
VERSION ${${PN}_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
|
||||
RENAME ${PN}${SUFFIX64}ConfigVersion.cmake
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
install(EXPORT "${PN}${SUFFIX64}Targets"
|
||||
NAMESPACE "${PN}${SUFFIX64}::"
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
@@ -23,9 +23,6 @@
|
||||
* Optimization on AMD Piledriver
|
||||
* Optimization on Intel Haswell
|
||||
|
||||
* Chris Sidebottom <chris.sidebottom@arm.com>
|
||||
* Optimizations and other improvements targeting AArch64
|
||||
|
||||
## Previous Developers
|
||||
|
||||
* Zaheer Chothia <zaheer.chothia@gmail.com>
|
||||
@@ -124,102 +121,11 @@ In chronological order:
|
||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||
ARMv8 support.
|
||||
|
||||
* Jerome Robert <jeromerobert@gmx.com>
|
||||
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
|
||||
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
|
||||
* [2015-12-28] Allow to force the number of parallel make job
|
||||
* [2015-12-28] Fix detection of AMD E2-3200 detection
|
||||
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
|
||||
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
|
||||
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
|
||||
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
|
||||
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)
|
||||
|
||||
* Dan Kortschak
|
||||
* [2015-01-07] Added test for drotmg bug #484.
|
||||
|
||||
* Ton van den Heuvel <https://github.com/ton>
|
||||
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
|
||||
|
||||
* Martin Koehler <https://github.com/grisuthedragon/>
|
||||
* [2015-09-07] Improved imatcopy
|
||||
|
||||
* Ashwin Sekhar T K <https://github.com/ashwinyes/>
|
||||
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
|
||||
* [2015-11-20] lapack-test fixes for Cortex-A57
|
||||
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
|
||||
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
|
||||
|
||||
* theoractice <https://github.com/theoractice/>
|
||||
* [2016-03-20] Fix compiler error in VisualStudio with CMake
|
||||
* [2016-03-22] Fix access violation on Windows while static linking
|
||||
|
||||
* Paul Mustière <https://github.com/buffer51/>
|
||||
* [2016-02-04] Fix Android build on ARMV7
|
||||
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8
|
||||
|
||||
* Shivraj Patil <https://github.com/sva-img/>
|
||||
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA
|
||||
|
||||
* Kaustubh Raste <https://github.com/ksraste/>
|
||||
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA
|
||||
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA
|
||||
|
||||
* Abdelrauf <https://github.com/quickwritereader>
|
||||
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
|
||||
* [2017-02-26] ztrmm kernel for IBM z13
|
||||
* [2017-03-13] strmm and ctrmm kernel for IBM z13
|
||||
* [2017-09-01] initial Blas Level-1,2 (double precision) for IBM z13
|
||||
* [2018-03-07] added missing Blas Level 1-2 (double precision) simd codes
|
||||
* [2019-02-01] added missing Blas Level-1,2 (single precision) simd codes
|
||||
* [2019-03-14] power9 dgemm/dtrmm kernel
|
||||
* [2019-04-29] power9 sgemm/strmm kernel
|
||||
|
||||
* Jiachen Wang <https://github.com/wjc404>
|
||||
* [2019-07-29] optimize AVX2 DGEMM
|
||||
* [2019-10-20] AVX512 DGEMM kernel (4x8)
|
||||
* [2019-11-06] optimize AVX512 SGEMM
|
||||
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels
|
||||
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
|
||||
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
|
||||
* [2020-01-07] optimize AVX2 SGEMM and STRMM
|
||||
|
||||
* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR>
|
||||
* [2020-04-15] Half-precision GEMM for bfloat16
|
||||
|
||||
* Marius Hillenbrand <https://github.com/mhillenibm>
|
||||
* [2020-05-12] Revise dynamic architecture detection for IBM z
|
||||
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
|
||||
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
|
||||
|
||||
* Danfeng Zhang <https://github.com/craft-zhang>
|
||||
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
||||
|
||||
* PingTouGe Semiconductor Co., Ltd.
|
||||
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910
|
||||
|
||||
* River Dillon <oss@outerpassage.net>
|
||||
* [2021-07-10] fix compilation with musl libc
|
||||
|
||||
* Bine Brank <https://github.com/binebrank>
|
||||
* [2021-10-27] Add vector-length-agnostic DGEMM kernels for Arm SVE
|
||||
* [2021-11-20] Vector-length-agnostic Arm SVE copy routines for DGEMM, DTRMM, DSYMM
|
||||
* [2021-11-12] SVE kernels for SGEMM, STRMM and corresponding SVE copy functions
|
||||
* [2022-01-06] SVE kernels for CGEMM, ZGEMM, CTRMM, ZTRMM and corresponding SVE copy functions
|
||||
* [2022-01-18] SVE kernels and copy functions for TRSM
|
||||
|
||||
* Ilya Kurdyukov <https://github.com/ilyakurdyukov>
|
||||
* [2021-02-21] Add basic support for the Elbrus E2000 architecture
|
||||
|
||||
* PLCT Lab, Institute of Software Chinese Academy of Sciences
|
||||
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
|
||||
|
||||
* Pablo Romero <https://github.com/pablorcum>
|
||||
* [2022-08] Fix building from sources for QNX
|
||||
|
||||
* Mark Seminatore <https://github.com/mseminatore>
|
||||
* [2023-11-09] Improve Windows threading performance scaling
|
||||
* [2024-02-09] Introduce MT_TRACE facility and improve code consistency
|
||||
|
||||
* Dirreke <https://github.com/mseminatore>
|
||||
* [2024-01-16] Add basic support for the CSKY architecture
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
|
||||
1624
Changelog.txt
1624
Changelog.txt
File diff suppressed because it is too large
Load Diff
@@ -80,7 +80,7 @@
|
||||
SUN
|
||||
Fujitsu
|
||||
|
||||
4. Supported precision
|
||||
4. Suported precision
|
||||
|
||||
Now x86/x86_64 version support 80bit FP precision in addition to
|
||||
normal double presicion and single precision. Currently only
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
operation is finished.
|
||||
|
||||
|
||||
2. Similar problem may happen under virtual machine. If supervisor
|
||||
2. Simlar problem may happen under virtual machine. If supervisor
|
||||
allocates different cores for each scheduling, BLAS performnace
|
||||
will be bad. This is because BLAS also utilizes all cache,
|
||||
unexpected re-schedule for different core may result of heavy
|
||||
|
||||
14
Jenkinsfile
vendored
14
Jenkinsfile
vendored
@@ -1,14 +0,0 @@
|
||||
pipeline {
|
||||
agent {
|
||||
docker {
|
||||
image 'osuosl/ubuntu-s390x'
|
||||
}
|
||||
}
|
||||
stages {
|
||||
stage('Build') {
|
||||
steps {
|
||||
sh 'make clean && make'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
pipeline {
|
||||
agent {
|
||||
docker {
|
||||
image 'osuosl/ubuntu-ppc64le'
|
||||
}
|
||||
}
|
||||
stages {
|
||||
stage('Build') {
|
||||
steps {
|
||||
sh 'sudo apt update'
|
||||
sh 'sudo apt install gfortran -y'
|
||||
sh 'make clean && make'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
261
Makefile
261
Makefile
@@ -1,9 +1,5 @@
|
||||
TOPDIR = .
|
||||
include ./Makefile.system
|
||||
LNCMD = ln -fs
|
||||
ifeq ($(FIXED_LIBNAME), 1)
|
||||
LNCMD = true
|
||||
endif
|
||||
|
||||
BLASDIRS = interface driver/level2 driver/level3 driver/others
|
||||
|
||||
@@ -11,6 +7,10 @@ ifneq ($(DYNAMIC_ARCH), 1)
|
||||
BLASDIRS += kernel
|
||||
endif
|
||||
|
||||
ifdef UTEST_CHECK
|
||||
SANITY_CHECK = 1
|
||||
endif
|
||||
|
||||
ifdef SANITY_CHECK
|
||||
BLASDIRS += reference
|
||||
endif
|
||||
@@ -20,37 +20,14 @@ ifneq ($(NO_LAPACK), 1)
|
||||
SUBDIRS += lapack
|
||||
endif
|
||||
|
||||
RELA =
|
||||
ifeq ($(BUILD_RELAPACK), 1)
|
||||
RELA = re_lapack
|
||||
endif
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
|
||||
|
||||
ifeq ($(NO_FORTRAN), 1)
|
||||
define NOFORTRAN
|
||||
1
|
||||
endef
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
define C_LAPACK
|
||||
1
|
||||
endef
|
||||
endif
|
||||
export NOFORTRAN
|
||||
export NO_LAPACK
|
||||
export C_LAPACK
|
||||
endif
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
ifeq ($(F_COMPILER),CRAY)
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS))
|
||||
else
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS))
|
||||
endif
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install blas-test
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test
|
||||
|
||||
.PHONY : all libs netlib $(RELA) test ctest shared install
|
||||
.NOTPARALLEL : shared
|
||||
|
||||
all :: tests
|
||||
all :: libs netlib tests shared
|
||||
@echo
|
||||
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
|
||||
@echo
|
||||
@@ -67,27 +44,10 @@ ifneq ($(INTERFACE64), 0)
|
||||
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
|
||||
endif
|
||||
endif
|
||||
@$(CC) --version > /dev/null 2>&1;\
|
||||
if [ $$? -eq 0 ]; then \
|
||||
cverinfo=`$(CC) --version | sed -n '1p'`; \
|
||||
if [ -z "$${cverinfo}" ]; then \
|
||||
cverinfo=`$(CC) --version | sed -n '2p'`; \
|
||||
fi; \
|
||||
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
|
||||
else \
|
||||
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
|
||||
fi
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
@$(FC) --version > /dev/null 2>&1;\
|
||||
if [ $$? -eq 0 ]; then \
|
||||
fverinfo=`$(FC) --version | sed -n '1p'`; \
|
||||
if [ -z "$${fverinfo}" ]; then \
|
||||
fverinfo=`$(FC) --version | sed -n '2p'`; \
|
||||
fi; \
|
||||
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
|
||||
else \
|
||||
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
|
||||
fi
|
||||
|
||||
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
|
||||
ifndef NOFORTRAN
|
||||
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
|
||||
endif
|
||||
ifneq ($(OSNAME), AIX)
|
||||
@echo -n " Library Name ... $(LIBNAME)"
|
||||
@@ -96,13 +56,9 @@ else
|
||||
endif
|
||||
|
||||
ifndef SMP
|
||||
@echo " (Single-threading) "
|
||||
@echo " (Single threaded) "
|
||||
else
|
||||
@echo " (Multi-threading; Max num-threads is $(NUM_THREADS))"
|
||||
endif
|
||||
|
||||
ifeq ($(DYNAMIC_ARCH), 1)
|
||||
@echo " Supporting multiple $(ARCH) cpu models with minimum requirement for the common code being $(CORE)"
|
||||
@echo " (Multi threaded; Max num-threads is $(NUM_THREADS))"
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
@@ -118,10 +74,6 @@ ifeq ($(OSNAME), Darwin)
|
||||
@echo "\"make PREFIX=/your_installation_path/ install\"."
|
||||
@echo
|
||||
@echo "(or set PREFIX in Makefile.rule and run make install."
|
||||
@echo
|
||||
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||
@echo "to circumvent any install errors."
|
||||
@echo
|
||||
@echo "If you want to move the .dylib to a new location later, make sure you change"
|
||||
@echo "the internal name of the dylib with:"
|
||||
@echo
|
||||
@@ -130,25 +82,25 @@ endif
|
||||
@echo
|
||||
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
|
||||
@echo
|
||||
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||
@echo "to circumvent any install errors."
|
||||
@echo
|
||||
|
||||
shared : libs netlib $(RELA)
|
||||
ifneq ($(NO_SHARED), 1)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
|
||||
shared :
|
||||
ifndef NO_SHARED
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@$(MAKE) -C exports so
|
||||
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
|
||||
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
@$(MAKE) -C exports so
|
||||
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@$(MAKE) -C exports dyn
|
||||
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@$(MAKE) -C exports dll
|
||||
@@ -156,42 +108,39 @@ endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@$(MAKE) -C exports dll
|
||||
endif
|
||||
ifeq ($(OSNAME), AIX)
|
||||
@$(MAKE) -C exports so
|
||||
endif
|
||||
endif
|
||||
|
||||
tests : shared
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
tests :
|
||||
ifndef NOFORTRAN
|
||||
ifndef TARGET
|
||||
ifndef CROSS
|
||||
touch $(LIBNAME)
|
||||
ifndef NO_FBLAS
|
||||
$(MAKE) -C test all
|
||||
endif
|
||||
endif
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
ifdef UTEST_CHECK
|
||||
$(MAKE) -C utest all
|
||||
endif
|
||||
ifneq ($(NO_CBLAS), 1)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
endif
|
||||
ifndef NO_CBLAS
|
||||
$(MAKE) -C ctest all
|
||||
endif
|
||||
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
|
||||
$(MAKE) -C cpp_thread_test all
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
libs :
|
||||
ifeq ($(CORE), UNKNOWN)
|
||||
ifeq ($(CORE), UNKOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(info OpenBLAS: Detecting fortran compiler failed. Can only compile BLAS and f2c-converted LAPACK.)
|
||||
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
|
||||
endif
|
||||
ifeq ($(NO_STATIC), 1)
|
||||
ifeq ($(NO_SHARED), 1)
|
||||
$(error OpenBLAS: neither static nor shared are enabled.)
|
||||
endif
|
||||
endif
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
@@ -213,36 +162,16 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
done
|
||||
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
|
||||
endif
|
||||
endif
|
||||
@echo TARGET=$(CORE) >> Makefile.conf_last
|
||||
ifdef USE_THREAD
|
||||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
endif
|
||||
ifdef SMP
|
||||
ifdef NUM_THREADS
|
||||
@echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last
|
||||
else
|
||||
@echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP),1)
|
||||
@echo USE_OPENMP=1 >> Makefile.conf_last
|
||||
endif
|
||||
ifeq ($(INTERFACE64),1)
|
||||
@echo INTERFACE64=1 >> Makefile.conf_last
|
||||
endif
|
||||
@echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last
|
||||
@echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last
|
||||
@-$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@touch lib.grd
|
||||
|
||||
prof : prof_blas prof_lapack
|
||||
|
||||
prof_blas :
|
||||
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d prof || exit 1 ; \
|
||||
@@ -253,7 +182,7 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||
endif
|
||||
|
||||
blas :
|
||||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d libs || exit 1 ; \
|
||||
@@ -261,7 +190,7 @@ blas :
|
||||
done
|
||||
|
||||
hpl :
|
||||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
@@ -275,77 +204,55 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||
endif
|
||||
|
||||
hpl_p :
|
||||
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
netlib :
|
||||
|
||||
else
|
||||
netlib : lapack_prebuild
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
ifndef NOFORTRAN
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
endif
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
ifndef NO_LAPACKE
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
re_lapack :
|
||||
|
||||
else
|
||||
re_lapack :
|
||||
@$(MAKE) -C relapack
|
||||
endif
|
||||
|
||||
prof_lapack : lapack_prebuild
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
|
||||
|
||||
lapack_prebuild :
|
||||
ifeq ($(NO_LAPACK), $(filter 0,$(NO_LAPACK)))
|
||||
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
-@echo "override FFLAGS = $(LAPACK_FFLAGS) -fno-tree-vectorize" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "override FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifndef NOFORTRAN
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1)
|
||||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGIBM1)
|
||||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
endif
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(FC), gfortran)
|
||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifdef SMP
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else ifeq ($(OSNAME), Haiku)
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
else
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
@@ -353,68 +260,40 @@ else
|
||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
|
||||
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE), 1)
|
||||
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_DOUBLE), 1)
|
||||
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_COMPLEX), 1)
|
||||
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_COMPLEX16), 1)
|
||||
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
||||
large.tgz :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
timing.tgz :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/timing.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
lapack-timing : large.tgz timing.tgz
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
endif
|
||||
|
||||
|
||||
lapack-test :
|
||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
ifneq ($(CROSS), 1)
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; $(MAKE) all; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
|
||||
endif
|
||||
|
||||
lapack-runtest: lapack-test
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING )
|
||||
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
|
||||
blas-test:
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out)
|
||||
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out)
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
|
||||
|
||||
|
||||
dummy :
|
||||
@@ -432,16 +311,14 @@ clean ::
|
||||
@$(MAKE) -C kernel clean
|
||||
#endif
|
||||
@$(MAKE) -C reference clean
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@rm -rf getarch.dSYM getarch_2nd.dSYM
|
||||
endif
|
||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||
@rm -f cblas.tmp cblas.tmp2
|
||||
@touch $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
@$(MAKE) -C relapack clean
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
|
||||
@echo Done.
|
||||
|
||||
@@ -1,24 +1,42 @@
|
||||
CPP = $(CC) -E
|
||||
RANLIB = ranlib
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
LIBNAME = $(LIBPREFIX)_ev4.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev4_p.a
|
||||
endif
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
LIBNAME = $(LIBPREFIX)_ev5.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev5_p.a
|
||||
endif
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
LIBNAME = $(LIBPREFIX)_ev6.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev6_p.a
|
||||
endif
|
||||
|
||||
ifneq ($(COMPILER), NATIVE)
|
||||
# GCC User
|
||||
ifeq ($(CORE), EV4)
|
||||
CCOMMON_OPT += -mcpu=ev4
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
OPTION += -DEV4 -mcpu=ev4
|
||||
endif
|
||||
ifeq ($(CORE), EV5)
|
||||
CCOMMON_OPT += -mcpu=ev5
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
OPTION += -DEV5 -mcpu=ev5
|
||||
endif
|
||||
ifeq ($(CORE), EV6)
|
||||
CCOMMON_OPT += -mcpu=ev6
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
OPTION += -DEV6 -mcpu=ev6
|
||||
endif
|
||||
else
|
||||
# Compaq Compiler User
|
||||
ifeq ($(CORE), EV4)
|
||||
CCOMMON_OPT += -tune ev4 -arch ev4
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
OPTION += -DEV4 -tune ev4 -arch ev4
|
||||
endif
|
||||
ifeq ($(CORE), EV5)
|
||||
CCOMMON_OPT += -tune ev5 -arch ev5
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
OPTION += -DEV5 -tune ev5 -arch ev5
|
||||
endif
|
||||
ifeq ($(CORE), EV6)
|
||||
CCOMMON_OPT += -tune ev6 -arch ev6
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
OPTION += -DEV6 -tune ev6 -arch ev6
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
34
Makefile.arm
34
Makefile.arm
@@ -1,19 +1,33 @@
|
||||
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
|
||||
# ifeq logical or
|
||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -mfpu=vfp
|
||||
FCOMMON_OPT += -mfpu=vfp
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
ifdef HAVE_NEON
|
||||
CCOMMON_OPT += -mfpu=neon
|
||||
FCOMMON_OPT += -mfpu=neon
|
||||
|
||||
ifeq ($(CORE), ARMV5)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
|
||||
330
Makefile.arm64
330
Makefile.arm64
@@ -1,337 +1,7 @@
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
ISCLANG=1
|
||||
endif
|
||||
ifeq ($(C_COMPILER), FUJITSU)
|
||||
ISCLANG=1
|
||||
endif
|
||||
ifneq (1, $(filter 1,$(GCCVERSIONGT4) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8-a
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
|
||||
|
||||
else
|
||||
|
||||
|
||||
ifeq ($(CORE), ARMV8)
|
||||
CCOMMON_OPT += -march=armv8-a
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV8SVE)
|
||||
CCOMMON_OPT += -march=armv8-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a+sve
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA53)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA57)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA72)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA73)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA76)
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), FT2000)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-N1 is only available
|
||||
# in GCC>=9
|
||||
ifeq ($(CORE), NEOVERSEN1)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-V1 is only available
|
||||
# in GCC>=10.4
|
||||
ifeq ($(CORE), NEOVERSEV1)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifeq (1, $(ISCLANG))
|
||||
CCOMMON_OPT += -mtune=cortex-x1
|
||||
else
|
||||
CCOMMON_OPT += -mtune=neoverse-v1
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(CROSS), 1)
|
||||
CCOMMON_OPT += -mtune=native
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a
|
||||
ifneq ($(CROSS), 1)
|
||||
FCOMMON_OPT += -mtune=native
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-N2 is only available
|
||||
# in GCC>=10.4
|
||||
ifeq ($(CORE), NEOVERSEN2)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifneq ($(OSNAME), Darwin)
|
||||
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.5-a+sve
|
||||
ifneq ($(CROSS), 1)
|
||||
CCOMMON_OPT += -mtune=native
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.5-a
|
||||
ifneq ($(CROSS), 1)
|
||||
FCOMMON_OPT += -mtune=native
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a53 tunings because a55 is only available in GCC>=8.1
|
||||
ifeq ($(CORE), CORTEXA55)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ8) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), FALKOR)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX2T99)
|
||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX3T110)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8.3-a
|
||||
ifeq (0, $(ISCLANG))
|
||||
CCOMMON_OPT += -mtune=thunderx3t110
|
||||
else
|
||||
CCOMMON_OPT += -mtune=thunderx2t99
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), VORTEX)
|
||||
CCOMMON_OPT += -march=armv8.3-a
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.3-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
|
||||
ifeq ($(CORE), TSV110)
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
|
||||
ifeq ($(CORE), EMAG8180)
|
||||
CCOMMON_OPT += -march=armv8-a
|
||||
ifeq ($(ISCLANG), 0)
|
||||
CCOMMON_OPT += -mtune=emag
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=emag
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), A64FX)
|
||||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXX1)
|
||||
CCOMMON_OPT += -march=armv8.2-a
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ12) $(ISCLANG)))
|
||||
CCOMMON_OPT += -mtune=cortex-x1
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-x1
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXX2)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
|
||||
CCOMMON_OPT += -mtune=cortex-x2
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mtune=cortex-x2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#ifeq (1, $(filter 1,$(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXA510)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXA710)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
|
||||
CCOMMON_OPT += -mtune=cortex-a710
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mtune=cortex-a710
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
ifeq ($(CORE), CK860FV)
|
||||
CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
|
||||
FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static
|
||||
endif
|
||||
@@ -1 +0,0 @@
|
||||
COPT = -Wall -O2 # -DGEMMTEST
|
||||
218
Makefile.install
218
Makefile.install
@@ -2,18 +2,6 @@ TOPDIR = .
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
-include $(TOPDIR)/Makefile.conf_last
|
||||
include ./Makefile.system
|
||||
LNCMD = ln -fs
|
||||
|
||||
ifdef THELIBNAME
|
||||
LIBNAME=$(THELIBNAME)
|
||||
LIBSONAME=$(THELIBSONAME)
|
||||
endif
|
||||
ifeq ($(FIXED_LIBNAME), 1)
|
||||
LNCMD = true
|
||||
endif
|
||||
ifeq ($(INTERFACE64),1)
|
||||
USE_64BITINT=1
|
||||
endif
|
||||
|
||||
PREFIX ?= /opt/OpenBLAS
|
||||
|
||||
@@ -21,23 +9,8 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/$(LIBSONAMEBASE)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
||||
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
|
||||
PKG_EXTRALIB := $(EXTRALIB)
|
||||
ifeq ($(INTERFACE64),1)
|
||||
SUFFIX64=64
|
||||
endif
|
||||
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
PKG_EXTRALIB += -lomp
|
||||
else
|
||||
PKG_EXTRALIB += -lgomp
|
||||
endif
|
||||
endif
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
@@ -46,173 +19,98 @@ lib.grd :
|
||||
$(error OpenBLAS: Please run "make" firstly)
|
||||
|
||||
install : lib.grd
|
||||
@-mkdir -p "$(DESTDIR)$(PREFIX)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@-mkdir -p $(DESTDIR)$(PREFIX)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#define OPENBLAS_CONFIG_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@cat openblas_config_template.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
|
||||
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#include \"openblas_config.h\" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@cat common_interface.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#endif >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
|
||||
ifndef NO_CBLAS
|
||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@cp cblas.h cblas.tmp
|
||||
ifdef SYMBOLPREFIX
|
||||
@sed 's/cblas[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/openblas[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
|
||||
#change back any openblas_complex_float and double that got hit
|
||||
@sed 's/$(SYMBOLPREFIX)openblas_complex_/openblas_complex_/g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/goto[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
|
||||
endif
|
||||
ifdef SYMBOLSUFFIX
|
||||
@sed 's/cblas[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/openblas[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
|
||||
#change back any openblas_complex_float and double that got hit
|
||||
@sed 's/\(openblas_complex_\)\([^ ]*\)$(SYMBOLSUFFIX)/\1\2 /g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/goto[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
|
||||
endif
|
||||
@sed 's/common/openblas_config/g' cblas.tmp > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h
|
||||
endif
|
||||
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifneq ($(NO_STATIC),1)
|
||||
ifndef NO_STATIC
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -m644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifneq ($(NO_SHARED),1)
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
|
||||
@install -m755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
|
||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-install_name_tool -id "$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).$(MAJOR_VERSION).dylib" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
|
||||
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
endif
|
||||
endif
|
||||
|
||||
else
|
||||
#install on AIX has different options syntax
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifneq ($(NO_STATIC),1)
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifneq ($(NO_SHARED),1)
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
#Generating openblas.pc
|
||||
ifeq ($(INTERFACE64),1)
|
||||
SUFFIX64=64
|
||||
endif
|
||||
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
|
||||
|
||||
@echo Generating $(LIBSONAMEBASE)$(SUFFIX64).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(PKGFILE)"
|
||||
@echo 'libprefix='$(LIBNAMEPREFIX) >> "$(PKGFILE)"
|
||||
@echo 'libnamesuffix='$(LIBNAMESUFFIX) >> "$(PKGFILE)"
|
||||
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
|
||||
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
|
||||
@echo 'version='$(VERSION) >> "$(PKGFILE)"
|
||||
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)"
|
||||
@cat openblas.pc.in >> "$(PKGFILE)"
|
||||
|
||||
|
||||
#Generating OpenBLASConfig.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
|
||||
ifneq ($(NO_SHARED),1)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
ifndef NO_SHARED
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
else
|
||||
#only static
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
#Generating OpenBLASConfigVersion.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "else ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo Install OK!
|
||||
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
ifdef BINARY64
|
||||
else
|
||||
endif
|
||||
@@ -1,4 +0,0 @@
|
||||
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
|
||||
ifdef BINARY64
|
||||
else
|
||||
endif
|
||||
@@ -1,4 +1,3 @@
|
||||
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
|
||||
ifdef BINARY64
|
||||
else
|
||||
endif
|
||||
|
||||
144
Makefile.power
144
Makefile.power
@@ -1,122 +1,4 @@
|
||||
|
||||
ifdef USE_THREAD
|
||||
ifeq ($(USE_THREAD), 0)
|
||||
USE_OPENMP = 0
|
||||
else
|
||||
USE_OPENMP = 1
|
||||
endif
|
||||
else
|
||||
USE_OPENMP = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER10)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||
else ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
|
||||
else
|
||||
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||
endif
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER9)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
CCOMMON_OPT += -mcpu=power8 -mtune=power8
|
||||
else
|
||||
CCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
||||
endif
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
FCOMMON_OPT += -mcpu=power8 -mtune=power8
|
||||
else
|
||||
FCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -Mrecursive
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER8)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
|
||||
else
|
||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
||||
endif
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
ifeq ($(OSNAME), AIX)
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
endif
|
||||
else
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -Mrecursive
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
||||
else
|
||||
CCOMMON_OPT += -DUSE_OPENMP -mp
|
||||
endif
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -DUSE_OPENMP
|
||||
else
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
||||
else
|
||||
FCOMMON_OPT += -DUSE_OPENMP -mp
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -fno-integrated-as
|
||||
endif
|
||||
# workaround for C->FORTRAN ABI violation in LAPACKE
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -fno-optimize-sibling-calls
|
||||
endif
|
||||
# CCOMMON_OPT += -DALLOC_SHM
|
||||
|
||||
FLAMEPATH = $(HOME)/flame/lib
|
||||
|
||||
@@ -134,38 +16,14 @@ else
|
||||
endif
|
||||
endif
|
||||
|
||||
#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library
|
||||
#USE_MASS = 1
|
||||
|
||||
ifeq ($(USE_MASS), 1)
|
||||
# Path to MASS libs, change it if the libs are installed at any other location
|
||||
MASSPATH = /opt/ibm/xlmass/8.1.5/lib
|
||||
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS
|
||||
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8
|
||||
endif
|
||||
|
||||
ifdef BINARY64
|
||||
|
||||
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX)
|
||||
$(error Using GCC and XLF on AIX is not a supported combination.)
|
||||
endif
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX)
|
||||
$(error Using Clang and gFortran on AIX is not a supported combination.)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mpowerpc64 -maix64
|
||||
else
|
||||
CCOMMON_OPT += -m64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mpowerpc64 -maix64
|
||||
endif
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -mpowerpc64 -maix64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), xlf)
|
||||
FCOMMON_OPT += -q64
|
||||
endif
|
||||
|
||||
@@ -3,10 +3,6 @@
|
||||
export BINARY
|
||||
export USE_OPENMP
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
override HOST_CFLAGS += -DDYNAMIC_ARCH
|
||||
endif
|
||||
|
||||
ifdef TARGET_CORE
|
||||
TARGET_MAKE = Makefile_kernel.conf
|
||||
TARGET_CONF = config_kernel.h
|
||||
@@ -15,74 +11,20 @@ TARGET_MAKE = Makefile.conf
|
||||
TARGET_CONF = config.h
|
||||
endif
|
||||
|
||||
ifdef USE_PERL
|
||||
SCRIPTSUFFIX = .pl
|
||||
else
|
||||
SCRIPTSUFFIX =
|
||||
endif
|
||||
|
||||
# CPUIDEMU = ../../cpuid/table.o
|
||||
|
||||
ifdef CPUIDEMU
|
||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), MIPS24K)
|
||||
TARGET_FLAGS = -mips32r2
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), MIPS1004K)
|
||||
TARGET_FLAGS = -mips32r2
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), P5600)
|
||||
TARGET_FLAGS = -mips32r5
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), I6400)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), P6600)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), I6500)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), C910V)
|
||||
TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), CK860FV)
|
||||
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), x280)
|
||||
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), RISCV64_ZVL256B)
|
||||
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), RISCV64_ZVL128B)
|
||||
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), RISCV64_GENERIC)
|
||||
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
|
||||
endif
|
||||
|
||||
all: getarch_2nd
|
||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||
|
||||
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch
|
||||
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" $(TARGET_FLAGS) $(CFLAGS)
|
||||
config.h : c_check f_check getarch
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" $(TARGET_FLAGS)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC)
|
||||
else
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||
@@ -97,17 +39,13 @@ endif
|
||||
|
||||
|
||||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
||||
avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
|
||||
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \
|
||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
$(HOSTCC) $(CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
|
||||
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy
|
||||
getarch_2nd : getarch_2nd.c config.h dummy
|
||||
ifndef TARGET_CORE
|
||||
$(HOSTCC) -I. $(HOST_CFLAGS) -o $(@F) getarch_2nd.c
|
||||
$(HOSTCC) -I. $(CFLAGS) -o $(@F) getarch_2nd.c
|
||||
else
|
||||
$(HOSTCC) -I. $(HOST_CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c
|
||||
$(HOSTCC) -I. $(CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c
|
||||
endif
|
||||
|
||||
dummy:
|
||||
|
||||
.PHONY: dummy
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
ifeq ($(CORE), C910V)
|
||||
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920
|
||||
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static
|
||||
endif
|
||||
ifeq ($(CORE), x280)
|
||||
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
|
||||
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
|
||||
endif
|
||||
ifeq ($(CORE), RISCV64_ZVL256B)
|
||||
CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d
|
||||
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
|
||||
endif
|
||||
ifeq ($(CORE), RISCV64_ZVL128B)
|
||||
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
|
||||
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
|
||||
endif
|
||||
ifeq ($(CORE), RISCV64_GENERIC)
|
||||
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
|
||||
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static
|
||||
endif
|
||||
207
Makefile.rule
207
Makefile.rule
@@ -3,12 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.27
|
||||
|
||||
# If you set this prefix, the library name will be lib$(LIBNAMESUFFIX)openblas.a
|
||||
# and lib$(LIBNAMESUFFIX)openblas.so, with a matching soname in the shared library
|
||||
#
|
||||
# LIBNAMEPREFIX = scipy
|
||||
VERSION = 0.2.14
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
@@ -22,11 +17,6 @@ VERSION = 0.3.27
|
||||
# If you want to support multiple architecture in one binary
|
||||
# DYNAMIC_ARCH = 1
|
||||
|
||||
# If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH
|
||||
# mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON,
|
||||
# OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures)
|
||||
# DYNAMIC_OLDER = 1
|
||||
|
||||
# C compiler including binary type(32bit / 64bit). Default is gcc.
|
||||
# Don't use Intel Compiler or PGI, it won't generate right codes as I expect.
|
||||
# CC = gcc
|
||||
@@ -53,8 +43,6 @@ VERSION = 0.3.27
|
||||
# HOSTCC = gcc
|
||||
|
||||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
|
||||
# Please note that AVX is not available on 32-bit.
|
||||
# Setting BINARY=32 disables AVX/AVX2/AVX-512.
|
||||
# BINARY=64
|
||||
|
||||
# About threaded BLAS. It will be automatically detected if you don't
|
||||
@@ -63,95 +51,40 @@ VERSION = 0.3.27
|
||||
# For force setting for multi threaded, specify USE_THREAD = 1
|
||||
# USE_THREAD = 0
|
||||
|
||||
# If you want to build a single-threaded OpenBLAS, but expect to call this
|
||||
# from several concurrent threads in some other program, comment this in for
|
||||
# thread safety. (This is done automatically for USE_THREAD=1 , and should not
|
||||
# be necessary when USE_OPENMP=1)
|
||||
# USE_LOCKING = 1
|
||||
|
||||
# If you're going to use this library with OpenMP, please comment it in.
|
||||
# This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8.
|
||||
# USE_OPENMP = 1
|
||||
|
||||
# The OpenMP scheduler to use - by default this is "static" and you
|
||||
# will normally not want to change this unless you know that your main
|
||||
# workload will involve tasks that have highly unbalanced running times
|
||||
# for individual threads. Changing away from "static" may also adversely
|
||||
# affect memory access locality in NUMA systems. Setting to "runtime" will
|
||||
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
|
||||
# CCOMMON_OPT += -DOMP_SCHED=dynamic
|
||||
|
||||
# You can define the maximum number of threads. Basically it should be less
|
||||
# than or equal to the number of CPU threads. If you don't specify one, it's
|
||||
# automatically detected by the build system.
|
||||
# If SMT (aka. HT) is enabled on the system, it may or may not be beneficial to
|
||||
# restrict NUM_THREADS to the number of physical cores. By default, the automatic
|
||||
# detection includes logical CPUs, thus allowing the use of SMT.
|
||||
# Users may opt at runtime to use less than NUM_THREADS threads.
|
||||
#
|
||||
# Note for package maintainers: you can build OpenBLAS with a large NUM_THREADS
|
||||
# value (eg. 32-256) if you expect your users to use that many threads. Due to the way
|
||||
# some internal structures are allocated, using a large NUM_THREADS value has a RAM
|
||||
# footprint penalty, even if users reduce the actual number of threads at runtime.
|
||||
# You can define maximum number of threads. Basically it should be
|
||||
# less than actual number of cores. If you don't specify one, it's
|
||||
# automatically detected by the the script.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# If you have enabled USE_OPENMP and your application would call
|
||||
# OpenBLAS's calculation API from multiple threads, please comment this in.
|
||||
# This flag defines how many instances of OpenBLAS's calculation API can actually
|
||||
# run in parallel. If more than NUM_PARALLEL threads call OpenBLAS's calculation API,
|
||||
# they need to wait for the preceding API calls to finish or risk data corruption.
|
||||
# NUM_PARALLEL = 2
|
||||
|
||||
# When multithreading, OpenBLAS needs to use a memory buffer for communicating
|
||||
# and collating results for individual subranges of the original matrix. Since
|
||||
# the original GotoBLAS of the early 2000s, the default size of this buffer has
|
||||
# been set at a value of 32<<20 (which is 32MB) on x86_64 , twice that on PPC.
|
||||
# If you expect to handle large problem sizes (beyond about 30000x30000) uncomment
|
||||
# this line and adjust the (32<<n) factor if necessary. Usually an insufficient value
|
||||
# manifests itself as a crash in the relevant scal kernel (sscal_k, dscal_k etc)
|
||||
# BUFFERSIZE = 25
|
||||
|
||||
# If you don't need to install the static library, please comment this in.
|
||||
# if you don't need to install the static library, please comment it in.
|
||||
# NO_STATIC = 1
|
||||
|
||||
# If you don't need to generate the shared library, please comment this in.
|
||||
# if you don't need generate the shared library, please comment it in.
|
||||
# NO_SHARED = 1
|
||||
|
||||
# If you don't need the CBLAS interface, please comment this in.
|
||||
# If you don't need CBLAS interface, please comment it in.
|
||||
# NO_CBLAS = 1
|
||||
|
||||
# If you only want the CBLAS interface without installing a Fortran compiler,
|
||||
# please comment this in.
|
||||
# If you only want CBLAS interface without installing Fortran compiler,
|
||||
# please comment it in.
|
||||
# ONLY_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment this in.
|
||||
# If you set NO_LAPACK=1, the build system automatically sets NO_LAPACKE=1.
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
|
||||
# NO_LAPACK = 1
|
||||
|
||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment this in.
|
||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
||||
# NO_LAPACKE = 1
|
||||
|
||||
# Build LAPACK Deprecated functions since LAPACK 3.6.0
|
||||
BUILD_LAPACK_DEPRECATED = 1
|
||||
|
||||
# Build RecursiveLAPACK on top of LAPACK
|
||||
# BUILD_RELAPACK = 1
|
||||
# Have RecursiveLAPACK actually replace standard LAPACK routines instead of
|
||||
# just adding its equivalents with a RELAPACK_ prefix
|
||||
# RELAPACK_REPLACE = 1
|
||||
|
||||
# If you want to use the legacy threaded Level 3 implementation.
|
||||
# If you want to use legacy threaded Level 3 implementation.
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
||||
# If you want to use the new, still somewhat experimental code that uses
|
||||
# thread-local storage instead of a central memory buffer in memory.c
|
||||
# Note that if your system uses GLIBC, it needs to have at least glibc 2.21
|
||||
# for this to work.
|
||||
# USE_TLS = 1
|
||||
|
||||
# If you want to drive whole 64bit region by BLAS. Not all Fortran
|
||||
# compilers support this. It's safe to keep this commented out if you
|
||||
# are not sure. (This is equivalent to the "-i8" ifort option).
|
||||
# compiler supports this. It's safe to keep comment it out if you
|
||||
# are not sure(equivalent to "-i8" option).
|
||||
# INTERFACE64 = 1
|
||||
|
||||
# Unfortunately most of kernel won't give us high quality buffer.
|
||||
@@ -159,18 +92,10 @@ BUILD_LAPACK_DEPRECATED = 1
|
||||
# but it will consume time. If you don't like it, you can disable one.
|
||||
NO_WARMUP = 1
|
||||
|
||||
# Comment this in if you want to disable OpenBLAS's CPU/Memory affinity handling.
|
||||
# This feature is only implemented on Linux, and is always disabled on other platforms.
|
||||
# Enabling affinity handling may improve performance, especially on NUMA systems, but
|
||||
# it may conflict with certain applications that also try to manage affinity.
|
||||
# This conflict can result in threads of the application calling OpenBLAS ending up locked
|
||||
# to the same core(s) as OpenBLAS, possibly binding all threads to a single core.
|
||||
# For this reason, affinity handling is disabled by default. Can be safely enabled if nothing
|
||||
# else modifies affinity settings.
|
||||
# Note: enabling affinity has been known to cause problems with NumPy and R
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
NO_AFFINITY = 1
|
||||
|
||||
# If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# BIGNUMA = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
@@ -180,67 +105,55 @@ NO_AFFINITY = 1
|
||||
# Don't use Haswell optimizations if binutils is too old (e.g. RHEL6)
|
||||
# NO_AVX2 = 1
|
||||
|
||||
# Don't use SkylakeX optimizations if binutils or compiler are too old (the build
|
||||
# system will try to determine this automatically)
|
||||
# NO_AVX512 = 1
|
||||
|
||||
# Don't use parallel make.
|
||||
# NO_PARALLEL_MAKE = 1
|
||||
|
||||
# Force number of make jobs. The default is the number of logical CPU of the host.
|
||||
# This is particularly useful when using distcc.
|
||||
# A negative value will disable adding a -j flag to make, allowing to use a parent
|
||||
# make -j value. This is useful to call OpenBLAS make from an other project
|
||||
# makefile
|
||||
# MAKE_NB_JOBS = 2
|
||||
|
||||
# If you would like to know minute performance report of GotoBLAS.
|
||||
# FUNCTION_PROFILE = 1
|
||||
|
||||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
|
||||
# This option should not be used - it is a holdover from unfinished code present
|
||||
# in the original GotoBLAS2 library that may be usable as a starting point but
|
||||
# is not even expected to compile in its present form.
|
||||
# QUAD_PRECISION = 1
|
||||
|
||||
# Support for integer matrix and vector (e.g. iaxpy)
|
||||
# INTEGER_PRECISION = 1
|
||||
|
||||
# Theads are still working for a while after finishing BLAS operation
|
||||
# to reduce thread activate/deactivate overhead. You can determine
|
||||
# time out to improve performance. This number should be from 4 to 30
|
||||
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
||||
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
||||
# system). Also you can control this number by THREAD_TIMEOUT
|
||||
# system). Also you can control this mumber by THREAD_TIMEOUT
|
||||
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
||||
|
||||
# Using special device driver for mapping physically contiguous memory
|
||||
# Using special device driver for mapping physically contigous memory
|
||||
# to the user space. If bigphysarea is enabled, it will use it.
|
||||
# DEVICEDRIVER_ALLOCATION = 1
|
||||
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. (Actually in recent versions this is a factor proportional to the
|
||||
# number of floating point operations necessary for the given problem size, no longer
|
||||
# an individual dimension). You can use this setting to avoid the overhead of multi-
|
||||
# threading in small matrix sizes. The default value is 4, but values as high as 50 have
|
||||
# been reported to be optimal for certain workloads (50 is the recommended value for Julia).
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||
# in small matrix sizes. The default value is 4.
|
||||
# GEMM_MULTITHREAD_THRESHOLD = 4
|
||||
|
||||
# If you need sanity check by comparing results to reference BLAS. It'll be very
|
||||
# If you need santy check by comparing reference BLAS. It'll be very
|
||||
# slow (Not implemented yet).
|
||||
# SANITY_CHECK = 1
|
||||
|
||||
# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable
|
||||
# SANITY_CHECK to compare the result with reference BLAS.
|
||||
# UTEST_CHECK = 1
|
||||
|
||||
# The installation directory.
|
||||
# PREFIX = /opt/OpenBLAS
|
||||
|
||||
# Common Optimization Flag;
|
||||
# The default -O2 is enough.
|
||||
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
# gfortran option for LAPACK to improve thread-safety
|
||||
# It is enabled by default in Makefile.system for gfortran
|
||||
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
|
||||
# gfortran option for LAPACK
|
||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
||||
# FCOMMON_OPT = -frecursive
|
||||
|
||||
# Profiling flags
|
||||
@@ -249,67 +162,19 @@ COMMON_PROF = -pg
|
||||
# Build Debug version
|
||||
# DEBUG = 1
|
||||
|
||||
# Set maximum stack allocation.
|
||||
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
|
||||
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
# Improve GEMV and GER for small matrices by stack allocation.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
#
|
||||
# MAX_STACK_ALLOC = 0
|
||||
MAX_STACK_ALLOC=2048
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoid conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459
|
||||
#
|
||||
# The same prefix and suffix are also added to the library name,
|
||||
# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas
|
||||
#
|
||||
# SYMBOLPREFIX=
|
||||
# SYMBOLSUFFIX=
|
||||
|
||||
# Run a C++ based thread safety tester after the build is done.
|
||||
# This is mostly intended as a developer feature to spot regressions, but users and
|
||||
# package maintainers can enable this if they have doubts about the thread safety of
|
||||
# the library, given the configuration in this file.
|
||||
# By default, the thread safety tester launches 52 concurrent calculations at the same
|
||||
# time.
|
||||
#
|
||||
# Please note that the test uses ~1300 MiB of RAM for the DGEMM test.
|
||||
#
|
||||
# The test requires CBLAS to be built, a C++11 capable compiler and the presence of
|
||||
# an OpenMP implementation. If you are cross-compiling this test will probably not
|
||||
# work at all.
|
||||
#
|
||||
# CPP_THREAD_SAFETY_TEST = 1
|
||||
#
|
||||
# use this to run only the less memory-hungry GEMV test
|
||||
# CPP_THREAD_SAFETY_GEMV = 1
|
||||
|
||||
|
||||
# If you want to enable the experimental BFLOAT16 support
|
||||
# BUILD_BFLOAT16 = 1
|
||||
|
||||
|
||||
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
|
||||
# will be allocated on the heap rather than the stack. (This array alone requires
|
||||
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
|
||||
# counts, but obviously it is not the only item that ends up on the stack.
|
||||
# The default value of 32 ensures that the overall requirement is compatible
|
||||
# with the default 1MB stacksize imposed by having the Java VM loaded without use
|
||||
# of its -Xss parameter.
|
||||
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
|
||||
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
|
||||
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
|
||||
# BLAS3_MEM_ALLOC_THRESHOLD = 160
|
||||
|
||||
|
||||
|
||||
# By default the library contains BLAS functions (and LAPACK if selected) for all input types.
|
||||
# To build a smaller library supporting e.g. only single precision real (SGEMM etc.) or only
|
||||
# the functions for complex numbers, uncomment the desired type(s) below
|
||||
# BUILD_SINGLE = 1
|
||||
# BUILD_DOUBLE = 1
|
||||
# BUILD_COMPLEX = 1
|
||||
# BUILD_COMPLEX16 = 1
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
||||
@@ -3,29 +3,21 @@ RANLIB = ranlib
|
||||
|
||||
ifdef BINARY64
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mcpu=v9 -m64
|
||||
else
|
||||
CCOMMON_OPT += -m64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mcpu=v9 -m64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), f95)
|
||||
FCOMMON_OPT += -m64
|
||||
ifeq ($(COMPILER_F77), f90)
|
||||
FCOMMON_OPT += -xarch=v9
|
||||
endif
|
||||
else
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mcpu=v9
|
||||
else
|
||||
CCOMMON_OPT += -xarch=v9
|
||||
endif
|
||||
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mcpu=v9
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), f95)
|
||||
ifeq ($(COMPILER_F77), f90)
|
||||
FCOMMON_OPT += -xarch=v8plusb
|
||||
endif
|
||||
|
||||
@@ -45,4 +37,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \
|
||||
else
|
||||
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
|
||||
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
|
||||
endif
|
||||
endif
|
||||
902
Makefile.system
902
Makefile.system
File diff suppressed because it is too large
Load Diff
@@ -1,18 +1,17 @@
|
||||
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SBEXTOBJS_P = $(SBEXTOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
BLASOBJS = $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
|
||||
BLASOBJS_P = $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
|
||||
BLASOBJS = $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
||||
BLASOBJS_P = $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
|
||||
|
||||
ifdef EXPRECISION
|
||||
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
@@ -24,23 +23,26 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
|
||||
ifdef INTEGER_PRECISION
|
||||
BLASOBJS += $(IBLASOBJS)
|
||||
BLASOBJS_P += $(IBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
|
||||
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX
|
||||
|
||||
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(SBEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
||||
libs :: $(BLASOBJS) $(COMMONOBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
33
Makefile.x86
33
Makefile.x86
@@ -1,21 +1,5 @@
|
||||
# COMPILER_PREFIX = mingw32-
|
||||
|
||||
ifneq ($(DYNAMIC_ARCH),1)
|
||||
ADD_CPUFLAGS = 1
|
||||
else
|
||||
ifdef TARGET_CORE
|
||||
ADD_CPUFLAGS = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef ADD_CPUFLAGS
|
||||
ifdef HAVE_SSE
|
||||
CCOMMON_OPT += -msse
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -msse
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x86
|
||||
@@ -70,20 +54,3 @@ LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
|
||||
else
|
||||
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
|
||||
endif
|
||||
ifdef HAVE_SSE2
|
||||
CCOMMON_OPT += -msse2
|
||||
FCOMMON_OPT += -msse2
|
||||
endif
|
||||
ifdef HAVE_SSE3
|
||||
CCOMMON_OPT += -msse3
|
||||
FCOMMON_OPT += -msse3
|
||||
ifdef HAVE_SSSE3
|
||||
CCOMMON_OPT += -mssse3
|
||||
FCOMMON_OPT += -mssse3
|
||||
endif
|
||||
ifdef HAVE_SSE4_1
|
||||
CCOMMON_OPT += -msse4.1
|
||||
FCOMMON_OPT += -msse4.1
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
205
Makefile.x86_64
205
Makefile.x86_64
@@ -8,211 +8,6 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifneq ($(DYNAMIC_ARCH),1)
|
||||
ADD_CPUFLAGS = 1
|
||||
else
|
||||
ifdef TARGET_CORE
|
||||
ADD_CPUFLAGS = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef ADD_CPUFLAGS
|
||||
ifdef HAVE_SSE3
|
||||
CCOMMON_OPT += -msse3
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -msse3
|
||||
endif
|
||||
endif
|
||||
ifdef HAVE_SSSE3
|
||||
CCOMMON_OPT += -mssse3
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mssse3
|
||||
endif
|
||||
endif
|
||||
ifdef HAVE_SSE4_1
|
||||
CCOMMON_OPT += -msse4.1
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -msse4.1
|
||||
endif
|
||||
endif
|
||||
ifndef OLDGCC
|
||||
ifdef HAVE_AVX
|
||||
CCOMMON_OPT += -mavx
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mavx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifndef NO_AVX2
|
||||
ifdef HAVE_AVX2
|
||||
CCOMMON_OPT += -mavx2
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), SKYLAKEX)
|
||||
ifndef NO_AVX512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), COOPERLAKE)
|
||||
ifndef NO_AVX512
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# cooperlake support was added in 10.1
|
||||
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
|
||||
CCOMMON_OPT += -march=cooperlake
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=cooperlake
|
||||
endif
|
||||
else # gcc not support, fallback to avx512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
else ifeq ($(C_COMPILER), CLANG)
|
||||
# cooperlake support was added in clang 9
|
||||
ifeq ($(CLANGVERSIONGTEQ9), 1)
|
||||
CCOMMON_OPT += -march=cooperlake
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=cooperlake
|
||||
endif
|
||||
else # not supported in clang, fallback to avx512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), SAPPHIRERAPIDS)
|
||||
ifndef NO_AVX512
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# sapphire rapids support was added in 11
|
||||
ifeq ($(GCCVERSIONGTEQ11), 1)
|
||||
CCOMMON_OPT += -march=sapphirerapids
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=sapphirerapids
|
||||
endif
|
||||
else # gcc not support, fallback to avx512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
else ifeq ($(C_COMPILER), CLANG)
|
||||
# sapphire rapids support was added in clang 12
|
||||
ifeq ($(CLANGVERSIONGTEQ12), 1)
|
||||
CCOMMON_OPT += -march=sapphirerapids
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=sapphirerapids
|
||||
endif
|
||||
else # not supported in clang, fallback to avx512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ZEN)
|
||||
ifdef HAVE_AVX512VL
|
||||
ifndef NO_AVX512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifdef HAVE_AVX2
|
||||
ifndef NO_AVX2
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# AVX2 support was added in 4.7.0
|
||||
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||
CCOMMON_OPT += -mavx2
|
||||
endif
|
||||
else
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
# AVX2 support was added in 4.7.0
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
else
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x64
|
||||
endif
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
|
||||
ifeq ($(CORE), Z13)
|
||||
CCOMMON_OPT += -march=z13 -mzvector
|
||||
FCOMMON_OPT += -march=z13 -mzvector
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), Z14)
|
||||
CCOMMON_OPT += -march=z14 -mzvector -O3
|
||||
FCOMMON_OPT += -march=z14 -mzvector
|
||||
endif
|
||||
|
||||
# Enable floating-point expression contraction for clang, since it is the
|
||||
# default for gcc
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -ffp-contract=on
|
||||
endif
|
||||
336
README.md
336
README.md
@@ -1,330 +1,138 @@
|
||||
# OpenBLAS
|
||||
|
||||
[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
[](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
Travis CI: [](https://travis-ci.com/xianyi/OpenBLAS)
|
||||
|
||||
AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
|
||||
|
||||
Cirrus CI: [](https://cirrus-ci.com/github/xianyi/OpenBLAS)
|
||||
<!-- Drone CI: [](https://cloud.drone.io/xianyi/OpenBLAS/)-->
|
||||
|
||||
|
||||
[](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop)
|
||||
|
||||
OSUOSL POWERCI [](http://powerci.osuosl.org/job/OpenBLAS_gh/job/develop/)
|
||||
|
||||
OSUOSL IBMZ-CI [](http://ibmz-ci.osuosl.org/job/OpenBLAS-Z/job/develop/)
|
||||
## Introduction
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
Please read the documentation on the OpenBLAS wiki pages: <https://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
For a general introduction to the BLAS routines, please refer to the extensive documentation of their reference implementation hosted at netlib:
|
||||
<https://www.netlib.org/blas>. On that site you will likewise find documentation for the reference implementation of the higher-level library LAPACK - the **L**inear **A**lgebra **Pack**age that comes included with OpenBLAS. If you are looking for a general primer or refresher on Linear Algebra, the set of six
|
||||
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare <https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/> or Youtube <https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek> may be helpful.
|
||||
Please read the documents on OpenBLAS wiki pages <http://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
## Binary Packages
|
||||
|
||||
We provide official binary packages for the following platform:
|
||||
We provide binary packages for the following platform.
|
||||
|
||||
* Windows x86/x86_64
|
||||
|
||||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the Releases section of the github project page, [https://github.com/xianyi/OpenBLAS/releases](https://github.com/xianyi/OpenBLAS/releases).
|
||||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
|
||||
|
||||
## Installation from Source
|
||||
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||
|
||||
Download from project homepage, https://xianyi.github.com/OpenBLAS/, or check out the code
|
||||
using Git from https://github.com/xianyi/OpenBLAS.git. (If you want the most up to date version, be
|
||||
sure to use the develop branch - master is several years out of date due to a change of maintainership.)
|
||||
Buildtime parameters can be chosen in Makefile.rule, see there for a short description of each option.
|
||||
Most can also be given directly on the make or cmake command line.
|
||||
|
||||
### Dependencies
|
||||
|
||||
Building OpenBLAS requires the following to be installed:
|
||||
|
||||
* GNU Make
|
||||
* A C compiler, e.g. GCC or Clang
|
||||
* A Fortran compiler (optional, for LAPACK)
|
||||
* IBM MASS (optional, see below)
|
||||
|
||||
Or, check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
### Normal compile
|
||||
|
||||
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
|
||||
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
|
||||
The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and
|
||||
can either be set there (typically by removing the comment character from the respective line), or used on the
|
||||
`make` command line.
|
||||
Note that when you run `make install` after building, you need to repeat all command line options you provided to `make`
|
||||
in the build step, as some settings like the supported maximum number of threads are automatically derived from the
|
||||
build host by default, which might not be what you want.
|
||||
For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel
|
||||
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any
|
||||
build options you plan to set.
|
||||
* type "make" to detect the CPU automatically.
|
||||
or
|
||||
* type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt.
|
||||
|
||||
### Cross compile
|
||||
|
||||
Set `CC` and `FC` to point to the cross toolchains, and set `HOSTCC` to your host C compiler.
|
||||
The target must be specified explicitly when cross compiling.
|
||||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
||||
|
||||
Examples:
|
||||
|
||||
* On an x86 box, compile this library for a loongson3a CPU:
|
||||
```sh
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
```
|
||||
or same with the newer mips-crosscompiler put out by Loongson that defaults to the 32bit ABI:
|
||||
```sh
|
||||
make HOSTCC=gcc CC='/opt/mips-loongson-gcc7.3-linux-gnu/2019.06-29/bin/mips-linux-gnu-gcc -mabi=64' FC='/opt/mips-loongson-gcc7.3-linux-gnu/2019.06-29/bin/mips-linux-gnu-gfortran -mabi=64' TARGET=LOONGSON3A
|
||||
```
|
||||
On X86 box, compile this library for loongson3a CPU.
|
||||
|
||||
* On an x86 box, compile this library for a loongson3a CPU with loongcc (based on Open64) compiler:
|
||||
```sh
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
```
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
|
||||
On X86 box, compile this library for loongson3a CPU with loongcc (based on Open64) compiler.
|
||||
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
|
||||
### Debug version
|
||||
|
||||
A debug version can be built using `make DEBUG=1`.
|
||||
make DEBUG=1
|
||||
|
||||
### Compile with MASS support on Power CPU (optional)
|
||||
### Install to the directory (optional)
|
||||
|
||||
The [IBM MASS](https://www.ibm.com/support/home/product/W511326D80541V01/other_software/mathematical_acceleration_subsystem) library consists of a set of mathematical functions for C, C++, and Fortran applications that are tuned for optimum performance on POWER architectures.
|
||||
OpenBLAS with MASS requires a 64-bit, little-endian OS on POWER.
|
||||
The library can be installed as shown:
|
||||
Example:
|
||||
|
||||
* On Ubuntu:
|
||||
```sh
|
||||
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -
|
||||
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list
|
||||
sudo apt-get update
|
||||
sudo apt-get install libxlmass-devel.8.1.5
|
||||
```
|
||||
make install PREFIX=your_installation_directory
|
||||
|
||||
* On RHEL/CentOS:
|
||||
```sh
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key
|
||||
sudo rpm --import repomd.xml.key
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo
|
||||
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/
|
||||
sudo yum install libxlmass-devel.8.1.5
|
||||
```
|
||||
The default directory is /opt/OpenBLAS
|
||||
|
||||
After installing the MASS library, compile OpenBLAS with `USE_MASS=1`.
|
||||
For example, to compile on Power8 with MASS support: `make USE_MASS=1 TARGET=POWER8`.
|
||||
## Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
### Install to a specific directory (optional)
|
||||
|
||||
Use `PREFIX=` when invoking `make`, for example
|
||||
|
||||
```sh
|
||||
make install PREFIX=your_installation_directory
|
||||
```
|
||||
(along with all options you added on the `make` command line in the preceding build step)
|
||||
The default installation directory is `/opt/OpenBLAS`.
|
||||
|
||||
## Supported CPUs and Operating Systems
|
||||
|
||||
Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by the 2010 GotoBLAS.
|
||||
|
||||
### Additional supported CPUs
|
||||
|
||||
#### x86/x86-64
|
||||
### Additional support CPU:
|
||||
|
||||
#### x86/x86-64:
|
||||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
|
||||
- **Intel Skylake-X**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
|
||||
- **Intel Cooper Lake**: as Skylake-X with improved BFLOAT16 support.
|
||||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4)
|
||||
|
||||
#### MIPS32
|
||||
|
||||
- **MIPS 1004K**: uses P5600 codes
|
||||
- **MIPS 24K**: uses P5600 codes
|
||||
|
||||
#### MIPS64
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
- **ICT Loongson 3B**: Experimental
|
||||
|
||||
#### ARM
|
||||
#### ARM:
|
||||
- **ARMV6**: Optimized BLAS for vfpv2 and vfpv3-d16 ( e.g. BCM2835, Cortex M0+ )
|
||||
- **ARMV7**: Optimized BLAS for vfpv3-d32 ( e.g. Cortex A8, A9 and A15 )
|
||||
|
||||
- **ARMv6**: Optimized BLAS for vfpv2 and vfpv3-d16 (e.g. BCM2835, Cortex M0+)
|
||||
- **ARMv7**: Optimized BLAS for vfpv3-d32 (e.g. Cortex A8, A9 and A15)
|
||||
|
||||
#### ARM64
|
||||
|
||||
- **ARMv8**: Basic ARMV8 with small caches, optimized Level-3 and Level-2 BLAS
|
||||
- **Cortex-A53**: same as ARMV8 (different cpu specifications)
|
||||
- **Cortex-A55**: same as ARMV8 (different cpu specifications)
|
||||
- **Cortex A57**: Optimized Level-3 and Level-2 functions
|
||||
- **Cortex A72**: same as A57 ( different cpu specifications)
|
||||
- **Cortex A73**: same as A57 (different cpu specifications)
|
||||
- **Cortex A76**: same as A57 (different cpu specifications)
|
||||
- **Falkor**: same as A57 (different cpu specifications)
|
||||
- **ThunderX**: Optimized some Level-1 functions
|
||||
- **ThunderX2T99**: Optimized Level-3 BLAS and parts of Levels 1 and 2
|
||||
- **ThunderX3T110**
|
||||
- **TSV110**: Optimized some Level-3 helper functions
|
||||
- **EMAG 8180**: preliminary support based on A57
|
||||
- **Neoverse N1**: (AWS Graviton2) preliminary support
|
||||
- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS
|
||||
- **Apple Vortex**: preliminary support based on ThunderX2/3
|
||||
- **A64FX**: preliminary support, optimized Level-3 BLAS
|
||||
- **ARMV8SVE**: any ARMV8 cpu with SVE extensions
|
||||
|
||||
#### PPC/PPC64
|
||||
|
||||
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
|
||||
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
|
||||
- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2.
|
||||
|
||||
- **AIX**: Dynamic architecture with OpenXL and OpenMP.
|
||||
```sh
|
||||
make CC=ibm-clang_r FC=xlf TARGET=POWER7 BINARY=64 USE_OPENMP=1 INTERFACE64=1 DYNAMIC_ARCH=1 USE_THREAD=1
|
||||
```
|
||||
|
||||
#### IBM zEnterprise System
|
||||
|
||||
- **Z13**: Optimized Level-3 BLAS and Level-1,2
|
||||
- **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2
|
||||
|
||||
#### RISC-V
|
||||
|
||||
- **C910V**: Optimized Level-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1.
|
||||
```sh
|
||||
make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran
|
||||
```
|
||||
(also known to work on C906 as long as you use only single-precision functions - its instruction set support appears to be incomplete in double precision)
|
||||
|
||||
- **x280**: Level-3 BLAS and Level-1,2 are optimized by RISC-V Vector extension 1.0.
|
||||
```sh
|
||||
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
|
||||
```
|
||||
|
||||
- **ZVL???B**: Level-3 BLAS and Level-1,2 including vectorised kernels targeting generic RISCV cores with vector support with registers of at least the corresponding width; ZVL128B and ZVL256B are available.
|
||||
e.g.:
|
||||
```sh
|
||||
make TARGET=RISCV64_ZVL256B CFLAGS="-DTARGET=RISCV64_ZVL256B" \
|
||||
BINARY=64 ARCH=riscv64 CC='clang -target riscv64-unknown-linux-gnu' \
|
||||
AR=riscv64-unknown-linux-gnu-ar AS=riscv64-unknown-linux-gnu-gcc \
|
||||
LD=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j
|
||||
```
|
||||
|
||||
### Support for multiple targets in a single library
|
||||
|
||||
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
|
||||
|
||||
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX, Cooper Lake, Sapphire Rapids. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.
|
||||
|
||||
`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
|
||||
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano.
|
||||
|
||||
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus. If compiler support for SVE is available at build time, support for NeoverseN2, NeoverseV1 as well as generic ArmV8SVE targets is also enabled.
|
||||
|
||||
For **POWER**, the list encompasses POWER6, POWER8 and POWER9. POWER10 is additionally available if a sufficiently recent compiler is used for the build.
|
||||
|
||||
on **ZARCH** it comprises Z13 and Z14 as well as generic zarch support.
|
||||
|
||||
The `TARGET` option can be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the
|
||||
common code in the library, usually you will want to set this to the oldest model you expect to encounter.
|
||||
Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library.
|
||||
|
||||
### Supported OS
|
||||
#### ARM64:
|
||||
- **ARMV8**: Experimental
|
||||
|
||||
### Support OS:
|
||||
- **GNU/Linux**
|
||||
- **MinGW or Visual Studio (CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/macOS/OSX/iOS**: Experimental. Although GotoBLAS2 already supports Darwin, we are not OSX/iOS experts.
|
||||
- **FreeBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **OpenBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **NetBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **DragonFly BSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
||||
- **AIX**: Supported on PPC up to POWER10
|
||||
- **Haiku**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **SunOS**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **Cortex-M**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-on-Cortex-M>.
|
||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
||||
|
||||
## Usage
|
||||
## Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
Statically link with `libopenblas.a` or dynamically link with `-lopenblas` if OpenBLAS was
|
||||
compiled as a shared library.
|
||||
### Set the number of threads with environment variables.
|
||||
|
||||
### Setting the number of threads using environment variables
|
||||
Examples:
|
||||
|
||||
Environment variables are used to specify a maximum number of threads.
|
||||
For example,
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
|
||||
```sh
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
export GOTO_NUM_THREADS=4
|
||||
export OMP_NUM_THREADS=4
|
||||
```
|
||||
or
|
||||
|
||||
The priorities are `OPENBLAS_NUM_THREADS` > `GOTO_NUM_THREADS` > `OMP_NUM_THREADS`.
|
||||
export GOTO_NUM_THREADS=4
|
||||
|
||||
If you compile this library with `USE_OPENMP=1`, you should set the `OMP_NUM_THREADS`
|
||||
environment variable; OpenBLAS ignores `OPENBLAS_NUM_THREADS` and `GOTO_NUM_THREADS` when
|
||||
compiled with `USE_OPENMP=1`.
|
||||
or
|
||||
|
||||
### Setting the number of threads at runtime
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
We provide the following functions to control the number of threads at runtime:
|
||||
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
```c
|
||||
void goto_set_num_threads(int num_threads);
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
```
|
||||
Note that these are only used once at library initialization, and are not available for
|
||||
fine-tuning thread numbers in individual BLAS calls.
|
||||
If you compile this library with `USE_OPENMP=1`, you should use the above functions too.
|
||||
If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1.
|
||||
|
||||
## Reporting bugs
|
||||
### Set the number of threads on runtime.
|
||||
|
||||
Please submit an issue in https://github.com/xianyi/OpenBLAS/issues.
|
||||
We provided the below functions to control the number of threads on runtime.
|
||||
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
|
||||
|
||||
## Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
## Contact
|
||||
|
||||
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
|
||||
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev
|
||||
|
||||
## Change log
|
||||
|
||||
Please see Changelog.txt to view the differences between OpenBLAS and GotoBLAS2 1.13 BSD version.
|
||||
## ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* Please read the [FAQ](https://github.com/xianyi/OpenBLAS/wiki/Faq) first.
|
||||
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
|
||||
Clang 3.0 will generate the wrong AVX binary code.
|
||||
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake AVX512 kernels.
|
||||
* The number of CPUs/cores should be less than or equal to 256. On Linux `x86_64` (`amd64`),
|
||||
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
|
||||
the library with `BIGNUMA=1`.
|
||||
* OpenBLAS does not set processor affinity by default.
|
||||
On Linux, you can enable processor affinity by commenting out the line `NO_AFFINITY=1` in
|
||||
Makefile.rule. However, note that this may cause
|
||||
[a conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A, `make test` may fail with a `pthread_create` error (`EAGAIN`).
|
||||
However, it will be okay when you run the same test case on the shell.
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue
|
||||
to start a discussion around a feature idea or a bug.
|
||||
2. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
3. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
4. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue to start a discussion around a feature idea or a bug.
|
||||
1. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
1. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
1. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
|
||||
## Donation
|
||||
|
||||
Please read [this wiki page](https://github.com/xianyi/OpenBLAS/wiki/Donation).
|
||||
|
||||
20
SECURITY.md
20
SECURITY.md
@@ -1,20 +0,0 @@
|
||||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
It is generally recommended to use the latest release as this project
|
||||
does not maintain multiple stable branches and providing packages e.g.
|
||||
for Linux distributions is outside our scope. In particular, versions
|
||||
before 0.3.18 can be assumed to carry the out-of-bounds-read error in
|
||||
the LAPACK ?LARRV family of functions that was the subject of
|
||||
CVE-2021-4048
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
If you suspect that you have found a vulnerability - a defect that could
|
||||
be abused to compromise the security of a user's code or systems - please
|
||||
do not use the normal github issue tracker (except perhaps to post a general
|
||||
warning if you deem that necessary). Instead, please contact the project
|
||||
maintainers through the email addresses given in their github user profiles.
|
||||
Defects found in the "lapack-netlib" subtree should ideally be reported to
|
||||
the maintainers of the reference implementation of LAPACK, lapack@icl.itk.edu
|
||||
@@ -20,10 +20,7 @@ DUNNINGTON
|
||||
NEHALEM
|
||||
SANDYBRIDGE
|
||||
HASWELL
|
||||
SKYLAKEX
|
||||
ATOM
|
||||
COOPERLAKE
|
||||
SAPPHIRERAPIDS
|
||||
|
||||
b)AMD CPU:
|
||||
ATHLON
|
||||
@@ -37,7 +34,6 @@ BULLDOZER
|
||||
PILEDRIVER
|
||||
STEAMROLLER
|
||||
EXCAVATOR
|
||||
ZEN
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
@@ -48,10 +44,6 @@ NANO
|
||||
POWER4
|
||||
POWER5
|
||||
POWER6
|
||||
POWER7
|
||||
POWER8
|
||||
POWER9
|
||||
POWER10
|
||||
PPCG4
|
||||
PPC970
|
||||
PPC970MP
|
||||
@@ -59,85 +51,24 @@ PPC440
|
||||
PPC440FP2
|
||||
CELL
|
||||
|
||||
3.MIPS CPU:
|
||||
P5600
|
||||
MIPS1004K
|
||||
MIPS24K
|
||||
|
||||
4.MIPS64 CPU:
|
||||
MIPS64_GENERIC
|
||||
3.MIPS64 CPU:
|
||||
SICORTEX
|
||||
LOONGSON3A
|
||||
LOONGSON3B
|
||||
I6400
|
||||
P6600
|
||||
I6500
|
||||
|
||||
5.IA64 CPU:
|
||||
4.IA64 CPU:
|
||||
ITANIUM2
|
||||
|
||||
6.SPARC CPU:
|
||||
5.SPARC CPU:
|
||||
SPARC
|
||||
SPARCV7
|
||||
|
||||
7.ARM CPU:
|
||||
6.ARM CPU:
|
||||
CORTEXA15
|
||||
CORTEXA9
|
||||
ARMV7
|
||||
ARMV6
|
||||
ARMV5
|
||||
|
||||
8.ARM 64-bit CPU:
|
||||
7.ARM 64-bit CPU:
|
||||
ARMV8
|
||||
CORTEXA53
|
||||
CORTEXA57
|
||||
CORTEXA72
|
||||
CORTEXA73
|
||||
CORTEXA76
|
||||
CORTEXA510
|
||||
CORTEXA710
|
||||
CORTEXX1
|
||||
CORTEXX2
|
||||
NEOVERSEN1
|
||||
NEOVERSEV1
|
||||
NEOVERSEN2
|
||||
CORTEXA55
|
||||
EMAG8180
|
||||
FALKOR
|
||||
THUNDERX
|
||||
THUNDERX2T99
|
||||
TSV110
|
||||
THUNDERX3T110
|
||||
VORTEX
|
||||
A64FX
|
||||
ARMV8SVE
|
||||
FT2000
|
||||
|
||||
9.System Z:
|
||||
ZARCH_GENERIC
|
||||
Z13
|
||||
Z14
|
||||
|
||||
10.RISC-V 64:
|
||||
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
|
||||
RISCV64_ZVL128B
|
||||
C910V
|
||||
x280
|
||||
RISCV64_ZVL256B
|
||||
|
||||
11.LOONGARCH64:
|
||||
LOONGSONGENERIC
|
||||
LOONGSON3R5
|
||||
LOONGSON2K1000
|
||||
|
||||
12. Elbrus E2000:
|
||||
E2K
|
||||
|
||||
13. Alpha
|
||||
EV4
|
||||
EV5
|
||||
EV6
|
||||
|
||||
14.CSKY
|
||||
CSKY
|
||||
CK860FV
|
||||
|
||||
213
USAGE.md
213
USAGE.md
@@ -1,213 +0,0 @@
|
||||
# Notes on OpenBLAS usage
|
||||
## Usage
|
||||
|
||||
#### Program is Terminated. Because you tried to allocate too many memory regions
|
||||
|
||||
In OpenBLAS, we mange a pool of memory buffers and allocate the number of
|
||||
buffers as the following.
|
||||
```
|
||||
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
|
||||
```
|
||||
This error indicates that the program exceeded the number of buffers.
|
||||
|
||||
Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
|
||||
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
|
||||
`MAX_CPU_NUMBER=NUM_THREADS`.
|
||||
|
||||
Despite its name, and due to the use of memory buffers in functions like SGEMM,
|
||||
the setting of NUM_THREADS can be relevant even for a single-threaded build
|
||||
of OpenBLAS, if such functions get called by multiple threads of a program
|
||||
that uses OpenBLAS. In some cases, the affected code may simply crash or throw
|
||||
a segmentation fault without displaying the above warning first.
|
||||
|
||||
Note that the number of threads used at runtime can be altered to differ from the
|
||||
value NUM_THREADS was set to at build time. At runtime, the actual number of
|
||||
threads can be set anywhere from 1 to the build's NUM_THREADS (note however,
|
||||
that this does not change the number of memory buffers that will be allocated,
|
||||
which is set at build time). The number of threads for a process can be set by
|
||||
using the mechanisms described below.
|
||||
|
||||
|
||||
#### How can I use OpenBLAS in multi-threaded applications?
|
||||
|
||||
If your application is already multi-threaded, it will conflict with OpenBLAS
|
||||
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the
|
||||
following ways:
|
||||
|
||||
* `export OPENBLAS_NUM_THREADS=1` in the environment variables.
|
||||
* Call `openblas_set_num_threads(1)` in the application on runtime.
|
||||
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0`
|
||||
|
||||
If the application is parallelized by OpenMP, please use OpenBLAS built with
|
||||
`USE_OPENMP=1`
|
||||
|
||||
#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH
|
||||
|
||||
The environment variable which control the kernel selection is
|
||||
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export
|
||||
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()`
|
||||
returns the used target.
|
||||
|
||||
#### How could I disable OpenBLAS threading affinity on runtime?
|
||||
|
||||
You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment
|
||||
variable to disable threading affinity on runtime. For example, before the
|
||||
running,
|
||||
```
|
||||
export OPENBLAS_MAIN_FREE=1
|
||||
```
|
||||
|
||||
Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1`
|
||||
in `Makefile.rule`.
|
||||
|
||||
## Linking with the library
|
||||
|
||||
* Link with shared library
|
||||
|
||||
`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas`
|
||||
|
||||
If the library is multithreaded, please add `-lpthread`. If the library
|
||||
contains LAPACK functions, please add `-lgfortran` or other Fortran libs.
|
||||
|
||||
* Link with static library
|
||||
|
||||
`gcc -o test test.c /your/path/libopenblas.a`
|
||||
|
||||
You can download `test.c` from https://gist.github.com/xianyi/5780018
|
||||
|
||||
On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by
|
||||
default), custom programs statically linked against `libopenblas.a` should also
|
||||
link with the pthread library e.g.:
|
||||
|
||||
```
|
||||
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread
|
||||
```
|
||||
|
||||
Failing to add the `-lpthread` flag will cause errors such as:
|
||||
|
||||
```
|
||||
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory':
|
||||
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock'
|
||||
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock'
|
||||
...
|
||||
```
|
||||
|
||||
## Code examples
|
||||
|
||||
#### Call CBLAS interface
|
||||
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656
|
||||
```
|
||||
#include <cblas.h>
|
||||
#include <stdio.h>
|
||||
|
||||
void main()
|
||||
{
|
||||
int i=0;
|
||||
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
|
||||
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
|
||||
|
||||
for(i=0; i<9; i++)
|
||||
printf("%lf ", C[i]);
|
||||
printf("\n");
|
||||
}
|
||||
```
|
||||
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran`
|
||||
|
||||
#### Call BLAS Fortran interface
|
||||
|
||||
This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018
|
||||
|
||||
```
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "sys/time.h"
|
||||
#include "time.h"
|
||||
|
||||
extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*);
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int i;
|
||||
printf("test!\n");
|
||||
if(argc<4){
|
||||
printf("Input Error\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int m = atoi(argv[1]);
|
||||
int n = atoi(argv[2]);
|
||||
int k = atoi(argv[3]);
|
||||
int sizeofa = m * k;
|
||||
int sizeofb = k * n;
|
||||
int sizeofc = m * n;
|
||||
char ta = 'N';
|
||||
char tb = 'N';
|
||||
double alpha = 1.2;
|
||||
double beta = 0.001;
|
||||
|
||||
struct timeval start,finish;
|
||||
double duration;
|
||||
|
||||
double* A = (double*)malloc(sizeof(double) * sizeofa);
|
||||
double* B = (double*)malloc(sizeof(double) * sizeofb);
|
||||
double* C = (double*)malloc(sizeof(double) * sizeofc);
|
||||
|
||||
srand((unsigned)time(NULL));
|
||||
|
||||
for (i=0; i<sizeofa; i++)
|
||||
A[i] = i%3+1;//(rand()%100)/10.0;
|
||||
|
||||
for (i=0; i<sizeofb; i++)
|
||||
B[i] = i%3+1;//(rand()%100)/10.0;
|
||||
|
||||
for (i=0; i<sizeofc; i++)
|
||||
C[i] = i%3+1;//(rand()%100)/10.0;
|
||||
//#if 0
|
||||
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);
|
||||
gettimeofday(&start, NULL);
|
||||
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);
|
||||
gettimeofday(&finish, NULL);
|
||||
|
||||
duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;
|
||||
double gflops = 2.0 * m *n*k;
|
||||
gflops = gflops/duration*1.0e-6;
|
||||
|
||||
FILE *fp;
|
||||
fp = fopen("timeDGEMM.txt", "a");
|
||||
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops);
|
||||
fclose(fp);
|
||||
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a`
|
||||
|
||||
` ./time_dgemm <m> <n> <k> `
|
||||
|
||||
## Troubleshooting
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## BLAS reference manual
|
||||
If you want to understand every BLAS function and definition, please read
|
||||
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm)
|
||||
or [netlib.org](http://netlib.org/blas/)
|
||||
|
||||
Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions)
|
||||
|
||||
## How to reference OpenBLAS.
|
||||
|
||||
You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications).
|
||||
|
||||
Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly.
|
||||
|
||||
78
appveyor.yml
78
appveyor.yml
@@ -1,78 +0,0 @@
|
||||
version: 0.2.19.{build}
|
||||
|
||||
#environment:
|
||||
|
||||
platform:
|
||||
- x64
|
||||
|
||||
os: Visual Studio 2017
|
||||
|
||||
configuration: Release
|
||||
|
||||
clone_folder: c:\projects\OpenBLAS
|
||||
|
||||
init:
|
||||
- git config --global core.autocrlf input
|
||||
|
||||
clone_depth: 5
|
||||
|
||||
skip_tags: true
|
||||
|
||||
matrix:
|
||||
fast_finish: false
|
||||
|
||||
skip_commits:
|
||||
# Add [av skip] to commit messages
|
||||
message: /\[av skip\]/
|
||||
|
||||
environment:
|
||||
global:
|
||||
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
|
||||
matrix:
|
||||
# - COMPILER: clang-cl
|
||||
# WITH_FORTRAN: ON
|
||||
# - COMPILER: clang-cl
|
||||
# DYNAMIC_ARCH: ON
|
||||
# WITH_FORTRAN: OFF
|
||||
# - COMPILER: cl
|
||||
# - COMPILER: MinGW64-gcc-7.2.0-mingw
|
||||
# DYNAMIC_ARCH: OFF
|
||||
# WITH_FORTRAN: ignore
|
||||
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
|
||||
COMPILER: MinGW-gcc-6.3.0-32
|
||||
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
|
||||
COMPILER: MinGW-gcc-5.3.0
|
||||
WITH_FORTRAN: ignore
|
||||
|
||||
install:
|
||||
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
|
||||
- if [%COMPILER%]==[clang-cl] conda update --yes -n base conda
|
||||
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
|
||||
- if [%COMPILER%]==[clang-cl] conda config --set auto_update_conda false
|
||||
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake ninja flang=11.0.1
|
||||
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
|
||||
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
|
||||
|
||||
before_build:
|
||||
- ps: if (-Not (Test-Path .\build)) { mkdir build }
|
||||
- cd build
|
||||
- set PATH=%PATH:C:\Program Files\Git\usr\bin;=%
|
||||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] set PATH=C:\MinGW\bin;C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
|
||||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] set PATH=C:\MinGW\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
|
||||
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] set PATH=C:\msys64\usr\bin;C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw64\bin;%PATH%
|
||||
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" ..
|
||||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%WITH_FORTRAN%]==[OFF] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DMSVC_STATIC_CRT=ON ..
|
||||
- if [%WITH_FORTRAN%]==[ON] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
|
||||
- if [%USE_OPENMP%]==[ON] cmake -DUSE_OPENMP=ON ..
|
||||
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
|
||||
|
||||
build_script:
|
||||
- cmake --build .
|
||||
|
||||
test_script:
|
||||
- ctest -j2
|
||||
|
||||
@@ -1,304 +0,0 @@
|
||||
trigger:
|
||||
# start a new build for every push
|
||||
batch: False
|
||||
branches:
|
||||
include:
|
||||
- develop
|
||||
resources:
|
||||
containers:
|
||||
- container: oneapi-hpckit
|
||||
image: intel/oneapi-hpckit:latest
|
||||
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
|
||||
- container: oneapi-basekit
|
||||
image: intel/oneapi-basekit:latest
|
||||
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
|
||||
|
||||
jobs:
|
||||
# manylinux1 is useful to test because the
|
||||
# standard Docker container uses an old version
|
||||
# of gcc / glibc
|
||||
- job: manylinux1_gcc
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
steps:
|
||||
- script: |
|
||||
echo "FROM quay.io/pypa/manylinux1_x86_64
|
||||
COPY . /tmp/openblas
|
||||
RUN cd /tmp/openblas && \
|
||||
COMMON_FLAGS='DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32' && \
|
||||
BTYPE='BINARY=64' CC=gcc && \
|
||||
make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE && \
|
||||
make -C test $COMMON_FLAGS $BTYPE && \
|
||||
make -C ctest $COMMON_FLAGS $BTYPE && \
|
||||
make -C utest $COMMON_FLAGS $BTYPE" > Dockerfile
|
||||
docker build .
|
||||
displayName: Run manylinux1 docker build
|
||||
- job: Intel_SDE_skx
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
steps:
|
||||
- script: |
|
||||
# at the time of writing the available Azure Ubuntu vm image
|
||||
# does not support AVX512VL, so use more recent LTS version
|
||||
echo "FROM ubuntu:bionic
|
||||
COPY . /tmp/openblas
|
||||
RUN apt-get -y update && apt-get -y install \\
|
||||
cmake \\
|
||||
gfortran \\
|
||||
make \\
|
||||
wget
|
||||
RUN mkdir /tmp/SDE && cd /tmp/SDE && \\
|
||||
mkdir sde-external-8.35.0-2019-03-11-lin && \\
|
||||
wget --quiet -O sde-external-8.35.0-2019-03-11-lin.tar.bz2 https://www.dropbox.com/s/fopsnzj67572sj5/sde-external-8.35.0-2019-03-11-lin.tar.bz2?dl=0 && \\
|
||||
tar -xjvf sde-external-8.35.0-2019-03-11-lin.tar.bz2 -C /tmp/SDE/sde-external-8.35.0-2019-03-11-lin --strip-components=1
|
||||
RUN cd /tmp/openblas && CC=gcc make QUIET_MAKE=1 DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64
|
||||
CMD cd /tmp/openblas && echo 0 > /proc/sys/kernel/yama/ptrace_scope && CC=gcc OPENBLAS_VERBOSE=2 /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/sde64 -cpuid_in /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/misc/cpuid/skx/cpuid.def -- make -C utest DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64" > Dockerfile
|
||||
docker build -t intel_sde .
|
||||
# we need a privileged docker run for sde process attachment
|
||||
docker run --privileged intel_sde
|
||||
displayName: 'Run AVX512 SkylakeX docker build / test'
|
||||
|
||||
- job: Windows_cl
|
||||
pool:
|
||||
vmImage: 'windows-latest'
|
||||
steps:
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
workingDirectory: 'build' # Optional
|
||||
cmakeArgs: '-G "Visual Studio 17 2022" ..'
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
cmakeArgs: '--build . --config Release'
|
||||
workingDirectory: 'build'
|
||||
- script: |
|
||||
cd build
|
||||
cd utest
|
||||
dir
|
||||
openblas_utest.exe
|
||||
|
||||
- job: Windows_mingw_gmake
|
||||
pool:
|
||||
vmImage: 'windows-latest'
|
||||
steps:
|
||||
- script: |
|
||||
mingw32-make CC=gcc FC=gfortran DYNAMIC_ARCH=1 DYNAMIC_LIST="SANDYBRIDGE"
|
||||
|
||||
- job: Windows_clang_cmake
|
||||
pool:
|
||||
vmImage: 'windows-latest'
|
||||
steps:
|
||||
- script: |
|
||||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
|
||||
set "LIB=C:\Miniconda\Library\lib;%LIB%"
|
||||
set "CPATH=C:\Miniconda\Library\include;%CPATH%
|
||||
conda config --add channels conda-forge --force
|
||||
conda config --set auto_update_conda false
|
||||
conda install --yes ninja
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DNOFORTRAN=1 -DMSVC_STATIC_CRT=ON ..
|
||||
cmake --build . --config Release
|
||||
ctest
|
||||
|
||||
- job: Windows_flang_clang
|
||||
pool:
|
||||
vmImage: 'windows-2022'
|
||||
steps:
|
||||
- script: |
|
||||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
|
||||
set "LIB=C:\Miniconda\Library\lib;%LIB%"
|
||||
set "CPATH=C:\Miniconda\Library\include;%CPATH%"
|
||||
conda config --add channels conda-forge --force
|
||||
conda config --set auto_update_conda false
|
||||
conda install --yes --quiet ninja flang
|
||||
mkdir build
|
||||
cd build
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER="flang -I C:\Miniconda\Library\include\flang" -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
|
||||
cmake --build . --config Release
|
||||
ctest
|
||||
|
||||
- job: Windows_cl_flang
|
||||
pool:
|
||||
vmImage: 'windows-2022'
|
||||
steps:
|
||||
- script: |
|
||||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
|
||||
set "LIB=C:\Miniconda\Library\lib;%LIB%"
|
||||
set "CPATH=C:\Miniconda\Library\include;%CPATH%"
|
||||
conda config --add channels conda-forge --force
|
||||
conda config --set auto_update_conda false
|
||||
conda install --yes --quiet ninja flang
|
||||
mkdir build
|
||||
cd build
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||
cmake -G "Ninja" -DCMAKE_C_COMPILER=cl -DCMAKE_Fortran_COMPILER=flang -DC_LAPACK=1 -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
|
||||
cmake --build . --config Release
|
||||
ctest
|
||||
|
||||
|
||||
|
||||
- job: OSX_OpenMP
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-10 FC=gfortran-10
|
||||
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-10 FC=gfortran-10 PREFIX=../blasinst install
|
||||
ls -lR ../blasinst
|
||||
|
||||
- job: OSX_GCC_Nothreads
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make USE_THREADS=0 CC=gcc-10 FC=gfortran-10
|
||||
|
||||
- job: OSX_GCC12
|
||||
pool:
|
||||
vmImage: 'macOS-latest'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make CC=gcc-12 FC=gfortran-12
|
||||
|
||||
- job: OSX_OpenMP_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-latest'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install llvm libomp
|
||||
make TARGET=CORE2 USE_OPENMP=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang NOFORTRAN=1
|
||||
|
||||
- job: OSX_OpenMP_Clang_cmake
|
||||
pool:
|
||||
vmImage: 'macOS-latest'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install llvm libomp
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DTARGET=CORE2 -DUSE_OPENMP=1 -DINTERFACE64=1 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DNOFORTRAN=1 -DNO_AVX512=1 ..
|
||||
make
|
||||
ctest
|
||||
|
||||
- job: OSX_dynarch_cmake
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
steps:
|
||||
- script: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DDYNAMIC_LIST='NEHALEM HASWELL SKYLAKEX' -DCMAKE_C_COMPILER=gcc-10 -DCMAKE_Fortran_COMPILER=gfortran-10 -DBUILD_SHARED_LIBS=ON ..
|
||||
cmake --build .
|
||||
ctest
|
||||
|
||||
- job: OSX_Ifort_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-latest'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
MACOS_FORTRAN_COMPONENTS: intel.oneapi.mac.ifort-compiler
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install llvm libomp
|
||||
sudo mkdir -p /opt/intel
|
||||
sudo chown $USER /opt/intel
|
||||
displayName: prepare for cache restore
|
||||
- task: Cache@2
|
||||
inputs:
|
||||
path: /opt/intel/oneapi
|
||||
key: '"install" | "$(MACOS_HPCKIT_URL)" | "$(MACOS_FORTRAN_COMPONENTS)"'
|
||||
cacheHitVar: CACHE_RESTORED
|
||||
- script: |
|
||||
curl --output webimage.dmg --url $(MACOS_HPCKIT_URL) --retry 5 --retry-delay 5
|
||||
hdiutil attach webimage.dmg
|
||||
sudo /Volumes/"$(basename "$(MACOS_HPCKIT_URL)" .dmg)"/bootstrapper.app/Contents/MacOS/bootstrapper -s --action install --components="$(MACOS_FORTRAN_COMPONENTS)" --eula=accept --continue-with-optional-error=yes --log-dir=.
|
||||
installer_exit_code=$?
|
||||
hdiutil detach /Volumes/"$(basename "$URL" .dmg)" -quiet
|
||||
exit $installer_exit_code
|
||||
displayName: install
|
||||
condition: ne(variables.CACHE_RESTORED, 'true')
|
||||
- script: |
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
make CC=/usr/local/opt/llvm/bin/clang FC=ifort
|
||||
|
||||
- job: OSX_NDK_ARMV7
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install --cask android-ndk
|
||||
export ANDROID_NDK_HOME=/usr/local/share/android-ndk
|
||||
make TARGET=ARMV7 ONLY_CBLAS=1 CC=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi21-clang AR=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/llvm-ar HOSTCC=gcc ARM_SOFTFP_ABI=1 -j4
|
||||
|
||||
- job: OSX_IOS_ARMV8
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
CC: /Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch arm64 -miphoneos-version-min=10.0
|
||||
steps:
|
||||
- script: |
|
||||
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
|
||||
|
||||
- job: OSX_IOS_ARMV7
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
CC: /Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
CFLAGS: -O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch armv7 -miphoneos-version-min=5.1
|
||||
steps:
|
||||
- script: |
|
||||
make TARGET=ARMV7 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
|
||||
|
||||
- job: OSX_xbuild_DYNAMIC_ARM64
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
CC: /Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX11.3.sdk -arch arm64
|
||||
steps:
|
||||
- script: |
|
||||
ls /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
|
||||
/Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -arch arm64 --print-supported-cpus
|
||||
/Applications/Xcode_11.7.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang --version
|
||||
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
|
||||
|
||||
- job: ALPINE_MUSL
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
steps:
|
||||
- script: |
|
||||
wget https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.14.0/alpine-chroot-install \
|
||||
&& echo 'ccbf65f85cdc351851f8ad025bb3e65bae4d5b06 alpine-chroot-install' | sha1sum -c \
|
||||
|| exit 1
|
||||
alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
|
||||
sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers sudo'
|
||||
alpine make DYNAMIC_ARCH=1 BINARY=64
|
||||
alpine make DYNAMIC_ARCH=1 BINARY=64 PREFIX=mytestdir install
|
||||
alpine ls -l mytestdir/include
|
||||
alpine echo "// tests that inclusion of openblas_config.h works with musl" >test_install.c
|
||||
alpine echo "#include <openblas_config.h>" >>test_install.c
|
||||
alpine echo "int main(){" >> test_install.c
|
||||
alpine echo "cpu_set_t* cpu_set = NULL;}" >>test_install.c
|
||||
alpine gcc -Imytestdir/include test_install.c -Lmytestdir/lib -lopenblas -lpthread -lgfortran -o test_install
|
||||
|
||||
1367
benchmark/Makefile
1367
benchmark/Makefile
File diff suppressed because it is too large
Load Diff
133
benchmark/amax.c
133
benchmark/amax.c
@@ -1,133 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef AMAX
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define AMAX BLASFUNC(dzamax)
|
||||
#else
|
||||
#define AMAX BLASFUNC(scamax)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define AMAX BLASFUNC(damax)
|
||||
#else
|
||||
#define AMAX BLASFUNC(samax)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x = 1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1, timeg;
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (argc > 0)
|
||||
{
|
||||
from = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
to = MAX(atol(*argv), from);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
step = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||
loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX")))
|
||||
inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
|
||||
|
||||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for (m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg = 0;
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l = 0; l < loops; l++)
|
||||
{
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||
{
|
||||
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
AMAX(&m, x, &inc_x);
|
||||
end();
|
||||
timeg += getsec();
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
137
benchmark/amin.c
137
benchmark/amin.c
@@ -1,137 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef AMIN
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define AMIN BLASFUNC(dzamin)
|
||||
#else
|
||||
#define AMIN BLASFUNC(scamin)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define AMIN BLASFUNC(damin)
|
||||
#else
|
||||
#define AMIN BLASFUNC(samin)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x = 1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1, timeg;
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (argc > 0)
|
||||
{
|
||||
from = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
to = MAX(atol(*argv), from);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
step = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||
loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX")))
|
||||
inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
|
||||
|
||||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for (m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg = 0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l = 0; l < loops; l++)
|
||||
{
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||
{
|
||||
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
AMIN(&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
|
||||
timeg += getsec();
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
183
benchmark/asum.c
183
benchmark/asum.c
@@ -25,108 +25,169 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef ASUM
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dzasum)
|
||||
#define ASUM BLASFUNC(dzasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(scasum)
|
||||
#define ASUM BLASFUNC(scasum)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dasum)
|
||||
#define ASUM BLASFUNC(dasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(sasum)
|
||||
#define ASUM BLASFUNC(sasum)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x = 1;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
double time1, timeg;
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
if (argc > 0)
|
||||
{
|
||||
from = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
to = MAX(atol(*argv), from);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
step = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||
loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX")))
|
||||
inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
|
||||
|
||||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for (m = from; m <= to; m += step)
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg = 0;
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l = 0; l < loops; l++)
|
||||
{
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||
{
|
||||
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
result = ASUM(&m, x, &inc_x);
|
||||
end();
|
||||
timeg += getsec();
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = ASUM (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
if (loops > 1)
|
||||
timeg /= loops;
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -1,124 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef AXPBY
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define AXPBY BLASFUNC(zaxpby)
|
||||
#else
|
||||
#define AXPBY BLASFUNC(caxpby)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define AXPBY BLASFUNC(daxpby)
|
||||
#else
|
||||
#define AXPBY BLASFUNC(saxpby)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
FLOAT beta[2] = {2.0, 2.0};
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
begin();
|
||||
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
|
||||
end();
|
||||
timeg += getsec();
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
(COMPSIZE * COMPSIZE * 4. - COMPSIZE) * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AXPY
|
||||
|
||||
@@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
@@ -56,6 +127,8 @@ int main(int argc, char *argv[]){
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -78,7 +151,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -102,13 +175,13 @@ int main(int argc, char *argv[]){
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
@@ -117,8 +190,8 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.9f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach/mach_time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
/* Benchmarks should allocate with cacheline (often 64 bytes) alignment
|
||||
to avoid unreliable results. This technique, storing the allocated
|
||||
pointer value just before the aligned memory, doesn't require
|
||||
C11's aligned_alloc for compatibility with older compilers. */
|
||||
static void *aligned_alloc_cacheline(size_t n)
|
||||
{
|
||||
void *p = malloc((size_t)(void *) + n + L1_DATA_LINESIZE - 1);
|
||||
if (p) {
|
||||
void **newp = (void **)
|
||||
(((uintptr_t)p + L1_DATA_LINESIZE) & (uintptr_t)-L1_DATA_LINESIZE);
|
||||
newp[-1] = p;
|
||||
p = newp;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
#define malloc aligned_alloc_cacheline
|
||||
#define free(p) free((p) ? ((void **)(p))[-1] : (p))
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
struct timeval start, stop;
|
||||
#elif defined(__APPLE__)
|
||||
mach_timebase_info_data_t info;
|
||||
uint64_t start = 0, stop = 0;
|
||||
#else
|
||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||
#endif
|
||||
|
||||
double getsec()
|
||||
{
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
#elif defined(__APPLE__)
|
||||
mach_timebase_info(&info);
|
||||
return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9;
|
||||
#else
|
||||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||
#endif
|
||||
}
|
||||
|
||||
void begin() {
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
#elif defined(__APPLE__)
|
||||
start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||
#else
|
||||
clock_gettime(CLOCK_REALTIME, &start);
|
||||
#endif
|
||||
}
|
||||
|
||||
void end() {
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
#elif defined(__APPLE__)
|
||||
stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||
#else
|
||||
clock_gettime(CLOCK_REALTIME, &stop);
|
||||
#endif
|
||||
}
|
||||
@@ -36,7 +36,12 @@
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
@@ -66,6 +71,41 @@ double fabs(double);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static __inline double getmflops(int ratio, int m, double secs){
|
||||
|
||||
double mm = (double)m;
|
||||
@@ -105,6 +145,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT maxerr;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -132,46 +173,46 @@ int main(int argc, char *argv[]){
|
||||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
}
|
||||
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -179,31 +220,29 @@ int main(int argc, char *argv[]){
|
||||
|
||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
maxerr = 0.;
|
||||
|
||||
if (!(uplos & 1)) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i <= j; i++) {
|
||||
#ifndef COMPLEX
|
||||
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
|
||||
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
|
||||
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
|
||||
#else
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -211,13 +250,10 @@ int main(int argc, char *argv[]){
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = j; i < m; i++) {
|
||||
#ifndef COMPLEX
|
||||
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
|
||||
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
|
||||
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
|
||||
#else
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
|
||||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
|
||||
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
118
benchmark/copy.c
118
benchmark/copy.c
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef COPY
|
||||
|
||||
@@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
@@ -57,9 +128,8 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1 = 0.0, timeg = 0.0;
|
||||
long nanos = 0;
|
||||
time_t seconds = 0;
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
@@ -81,7 +151,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -93,27 +163,35 @@ int main(int argc, char *argv[]){
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
begin();
|
||||
COPY (&m, x, &inc_x, y, &inc_y );
|
||||
end();
|
||||
timeg += getsec();
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %12.9f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg / 1.e6, timeg);
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
COPY (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(ddot)
|
||||
#else
|
||||
#define DOT BLASFUNC(sdot)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
@@ -49,6 +122,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -71,7 +145,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -95,20 +169,23 @@ int main(int argc, char *argv[]){
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
end();
|
||||
timeg += getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,13 @@
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEEV
|
||||
|
||||
@@ -68,6 +74,71 @@ extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
|
||||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||
@@ -83,6 +154,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -123,7 +195,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,7 +214,7 @@ int main(int argc, char *argv[]){
|
||||
}
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -151,7 +223,7 @@ int main(int argc, char *argv[]){
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
lwork = -1;
|
||||
#ifndef COMPLEX
|
||||
@@ -167,14 +239,14 @@ int main(int argc, char *argv[]){
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||
|
||||
221
benchmark/gemm.c
221
benchmark/gemm.c
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMM
|
||||
|
||||
@@ -33,8 +39,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(dgemm)
|
||||
#elif defined(HALF)
|
||||
#define GEMM BLASFUNC(sbgemm)
|
||||
#else
|
||||
#define GEMM BLASFUNC(sgemm)
|
||||
#endif
|
||||
@@ -49,127 +53,168 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
IFLOAT *a, *b;
|
||||
FLOAT *c;
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
char transa = 'N';
|
||||
char transb = 'N';
|
||||
blasint m, n, k, i, j, lda, ldb, ldc;
|
||||
char trans='N';
|
||||
blasint m, n, i, j;
|
||||
int loops = 1;
|
||||
int has_param_m = 0;
|
||||
int has_param_n = 0;
|
||||
int has_param_k = 0;
|
||||
int has_param_n=0;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1, timeg;
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++; }
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++; }
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) {
|
||||
transa=*p;
|
||||
transb=*p;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_TRANSA"))) {
|
||||
transa=*p;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_TRANSB"))) {
|
||||
transb=*p;
|
||||
}
|
||||
TOUPPER(transa);
|
||||
TOUPPER(transb);
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb);
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL ) {
|
||||
loops = atoi(p);
|
||||
}
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
if ((p = getenv("OPENBLAS_PARAM_M"))) {
|
||||
m = atoi(p);
|
||||
has_param_m=1;
|
||||
} else {
|
||||
m = to;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
} else {
|
||||
n = to;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_PARAM_K"))) {
|
||||
k = atoi(p);
|
||||
has_param_k=1;
|
||||
} else {
|
||||
k = to;
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
}
|
||||
|
||||
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < m * k * COMPSIZE; i++) {
|
||||
a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < k * n * COMPSIZE; i++) {
|
||||
b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < m * n * COMPSIZE; i++) {
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for (i = from; i <= to; i += step) {
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
if (!has_param_m) { m = i; }
|
||||
if (!has_param_n) { n = i; }
|
||||
if (!has_param_k) { k = i; }
|
||||
if ( has_param_n == 1 && n <= m )
|
||||
n=n;
|
||||
else
|
||||
n=m;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
if (transa == 'N') { lda = m; }
|
||||
else { lda = k; }
|
||||
if (transb == 'N') { ldb = k; }
|
||||
else { ldb = n; }
|
||||
ldc = m;
|
||||
|
||||
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
|
||||
begin();
|
||||
|
||||
for (j=0; j<loops; j++) {
|
||||
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
|
||||
}
|
||||
|
||||
end();
|
||||
time1 = getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg = time1/loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)k * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMM
|
||||
|
||||
@@ -47,6 +53,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
@@ -62,6 +133,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -91,7 +163,7 @@ int main(int argc, char *argv[]){
|
||||
loops = atoi(p);
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -109,18 +181,22 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
timeg += getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
102
benchmark/gemv.c
102
benchmark/gemv.c
@@ -25,7 +25,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMV
|
||||
@@ -47,11 +52,77 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 0.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
@@ -66,6 +137,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -109,7 +181,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -125,7 +197,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -136,20 +208,20 @@ int main(int argc, char *argv[]){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
end();
|
||||
time1 = getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -162,7 +234,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -173,20 +245,20 @@ int main(int argc, char *argv[]){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
end();
|
||||
time1 = getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GER
|
||||
|
||||
@@ -43,6 +49,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
@@ -59,6 +131,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -92,7 +165,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -109,7 +182,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -125,13 +198,16 @@ int main(int argc, char *argv[]){
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
|
||||
|
||||
end();
|
||||
|
||||
timeg += getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
@@ -36,7 +36,12 @@
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
@@ -61,6 +66,71 @@ double fabs(double);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
@@ -72,6 +142,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -94,7 +165,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -106,35 +177,39 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = 0.0;
|
||||
b[i + j * m * COMPSIZE] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
fprintf(stderr,
|
||||
"%10.2f MFlops %10.6f s\n",
|
||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -36,7 +36,12 @@
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef GETRF
|
||||
#undef GETRI
|
||||
@@ -67,22 +72,84 @@
|
||||
|
||||
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*work;
|
||||
FLOAT wkopt[4];
|
||||
blasint *ipiv;
|
||||
blasint m, i, j, l, info,lwork;
|
||||
blasint m, i, j, info,lwork;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
int loops = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
char *p;
|
||||
char btest = 'I';
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
@@ -90,9 +157,6 @@ int main(int argc, char *argv[]){
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
||||
|
||||
@@ -108,7 +172,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -124,48 +188,39 @@ int main(int argc, char *argv[]){
|
||||
}
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE FLops Time Lwork\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
timeg = 0.;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
if (btest == 'F') begin();
|
||||
GETRF (&m, &m, a, &m, ipiv, &info);
|
||||
if (btest == 'F') {
|
||||
end();
|
||||
timeg += getsec();
|
||||
}
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (btest == 'I') begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
lwork = -1;
|
||||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
||||
if (btest == 'I') end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (btest == 'I')
|
||||
timeg += getsec();
|
||||
|
||||
} // loops
|
||||
time1 = timeg/(double)loops;
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);
|
||||
|
||||
134
benchmark/hbmv.c
134
benchmark/hbmv.c
@@ -1,134 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef HBMV
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HBMV BLASFUNC(zhbmv)
|
||||
#else
|
||||
#define HBMV BLASFUNC(chbmv)
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
blasint k = 1;
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1, inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_K"))) k = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n",
|
||||
from, to, step, uplo, k, inc_x, inc_y, loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step) {
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++) {
|
||||
for(i = 0; i < m * COMPSIZE; i++) {
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
end();
|
||||
|
||||
timeg += getsec();
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HEMM
|
||||
|
||||
@@ -35,6 +41,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define HEMM BLASFUNC(chemm)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
@@ -54,6 +126,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -78,7 +151,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -91,19 +164,21 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
|
||||
@@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HEMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HEMV BLASFUNC(zhemv)
|
||||
#else
|
||||
#define HEMV BLASFUNC(chemv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
@@ -51,6 +124,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -78,7 +152,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -93,7 +167,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -108,13 +182,13 @@ int main(int argc, char *argv[]){
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
||||
109
benchmark/her.c
109
benchmark/her.c
@@ -1,109 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef HER
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HER BLASFUNC(zher)
|
||||
#else
|
||||
#define HER BLASFUNC(cher)
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint incx = 1;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
HER (&uplo, &m, alpha, x, &incx, a, &m );
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
113
benchmark/her2.c
113
benchmark/her2.c
@@ -1,113 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2020, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef HER2
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HER2 BLASFUNC(zher2)
|
||||
#else
|
||||
#define HER2 BLASFUNC(cher2)
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint inc = 1;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HER2K
|
||||
#ifdef DOUBLE
|
||||
@@ -34,6 +40,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define HER2K BLASFUNC(cher2k)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
@@ -53,6 +125,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -77,7 +150,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -90,19 +163,21 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
|
||||
@@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HERK
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HERK BLASFUNC(zherk)
|
||||
#else
|
||||
#define HERK BLASFUNC(cherk)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
@@ -54,6 +127,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -75,7 +149,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -88,22 +162,25 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
133
benchmark/hpmv.c
133
benchmark/hpmv.c
@@ -1,133 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef HPMV
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HPMV BLASFUNC(zhpmv)
|
||||
#else
|
||||
#define HPMV BLASFUNC(chpmv)
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1, inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step) {
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++) {
|
||||
for(i = 0; i < m * COMPSIZE; i++) {
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -1,120 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef IAMAX
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define IAMAX BLASFUNC(izamax)
|
||||
#else
|
||||
#define IAMAX BLASFUNC(icamax)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define IAMAX BLASFUNC(idamax)
|
||||
#else
|
||||
#define IAMAX BLASFUNC(isamax)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
IAMAX (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -1,120 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef IAMIN
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define IAMIN BLASFUNC(izamin)
|
||||
#else
|
||||
#define IAMIN BLASFUNC(icamin)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define IAMIN BLASFUNC(idamin)
|
||||
#else
|
||||
#define IAMIN BLASFUNC(isamin)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
IAMIN (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
114
benchmark/imax.c
114
benchmark/imax.c
@@ -1,114 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef IMAX
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define IMAX BLASFUNC(idmax)
|
||||
#else
|
||||
#define IMAX BLASFUNC(ismax)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
IMAX (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
114
benchmark/imin.c
114
benchmark/imin.c
@@ -1,114 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef IMIN
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define IMIN BLASFUNC(idmin)
|
||||
#else
|
||||
#define IMIN BLASFUNC(ismin)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
IMIN (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -36,7 +36,12 @@
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
@@ -67,26 +72,88 @@ double fabs(double);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
|
||||
blasint m, i, j, l, info;
|
||||
blasint m, i, j, info;
|
||||
blasint unit = 1;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
int loops = 1;
|
||||
|
||||
FLOAT maxerr;
|
||||
|
||||
double time1, time2, timeg1,timeg2;
|
||||
struct timeval start, stop;
|
||||
double time1, time2;
|
||||
|
||||
char *p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
@@ -107,19 +174,19 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Residual Decompose Solve Total\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
timeg1 = timeg2 = 0.;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,38 +194,36 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GETRF (&m, &m, a, &m, ipiv, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
timeg1 += getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
timeg2 += getsec();
|
||||
} //loops
|
||||
time1=timeg1/(double)loops;
|
||||
time2=timeg2/(double)loops;
|
||||
time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
maxerr = 0.;
|
||||
|
||||
for(i = 0; i < m; i++){
|
||||
|
||||
113
benchmark/max.c
113
benchmark/max.c
@@ -1,113 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef NAMAX
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define NAMAX BLASFUNC(dmax)
|
||||
#else
|
||||
#define NAMAX BLASFUNC(smax)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
NAMAX (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
113
benchmark/min.c
113
benchmark/min.c
@@ -1,113 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef NAMIN
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define NAMIN BLASFUNC(dmin)
|
||||
#else
|
||||
#define NAMIN BLASFUNC(smin)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
NAMIN (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
121
benchmark/nrm2.c
121
benchmark/nrm2.c
@@ -1,121 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef NRM2
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define NRM2 BLASFUNC(dznrm2)
|
||||
#else
|
||||
#define NRM2 BLASFUNC(scnrm2)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define NRM2 BLASFUNC(dnrm2)
|
||||
#else
|
||||
#define NRM2 BLASFUNC(snrm2)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
NRM2 (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -36,7 +36,12 @@
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
@@ -81,7 +86,37 @@ double fabs(double);
|
||||
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
|
||||
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
@@ -99,15 +134,15 @@ int main(int argc, char *argv[]){
|
||||
char *p;
|
||||
char btest = 'F';
|
||||
|
||||
blasint m, i, j, l, info, uplos=0;
|
||||
double flops = 0.;
|
||||
blasint m, i, j, info, uplos=0;
|
||||
double flops;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
int loops = 1;
|
||||
|
||||
double time1, timeg;
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
@@ -120,8 +155,6 @@ int main(int argc, char *argv[]){
|
||||
|
||||
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
@@ -132,53 +165,51 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
timeg=0.;
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
for(i = 0; i < j; i++) a[i + j * m] = 0.;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
|
||||
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
}
|
||||
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (j = 0; j < m; j++) {
|
||||
for(i = 0; i < j; i++) {
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
|
||||
a[(j + j * m) * 2 + 1] = 0.;
|
||||
|
||||
for(i = j + 1; i < m; i++) {
|
||||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
|
||||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
|
||||
a[(i + j * m) * 2 + 0] = 0.;
|
||||
a[(i + j * m) * 2 + 1] = 0.;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -186,19 +217,19 @@ int main(int argc, char *argv[]){
|
||||
|
||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potrf info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ( btest == 'F')
|
||||
timeg += getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
|
||||
if ( btest == 'S' )
|
||||
{
|
||||
@@ -209,43 +240,39 @@ int main(int argc, char *argv[]){
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potrs info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
timeg += getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
||||
|
||||
}
|
||||
|
||||
if ( btest == 'I' )
|
||||
{
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRI(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potri info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
timeg += getsec();
|
||||
}
|
||||
} // loops
|
||||
|
||||
time1 = timeg/(double)loops;
|
||||
if ( btest == 'F')
|
||||
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
if ( btest == 'S')
|
||||
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
||||
if ( btest == 'I')
|
||||
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest);
|
||||
|
||||
|
||||
|
||||
134
benchmark/rot.c
134
benchmark/rot.c
@@ -1,134 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef ROT
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ROT BLASFUNC(drot)
|
||||
#else
|
||||
#define ROT BLASFUNC(srot)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ROT BLASFUNC(zdrot)
|
||||
#else
|
||||
#define ROT BLASFUNC(csrot)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
// FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
FLOAT c[1] = { 2.0 };
|
||||
FLOAT s[1] = { 2.0 };
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
begin();
|
||||
|
||||
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
138
benchmark/rotm.c
138
benchmark/rotm.c
@@ -1,138 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef ROTM
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ROTM BLASFUNC(drotm)
|
||||
#else
|
||||
#define ROTM BLASFUNC(srotm)
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
FLOAT *x, *y;
|
||||
// FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x = 1, inc_y = 1;
|
||||
FLOAT param[5] = {1, 2.0, 3.0, 4.0, 5.0};
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
|
||||
double time1, timeg;
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if (argc > 0) {
|
||||
from = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0) {
|
||||
to = MAX(atol(*argv), from);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0) {
|
||||
step = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||
loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX")))
|
||||
inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY")))
|
||||
inc_y = atoi(p);
|
||||
|
||||
fprintf(
|
||||
stderr,
|
||||
"From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n",
|
||||
from, to, step, inc_x, inc_y, loops);
|
||||
|
||||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) ==
|
||||
NULL) {
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) ==
|
||||
NULL) {
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for (m = from; m <= to; m += step) {
|
||||
|
||||
timeg = 0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
|
||||
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
|
||||
y[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
begin();
|
||||
|
||||
ROTM(&m, x, &inc_x, y, &inc_y, param);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
112
benchmark/scal.c
112
benchmark/scal.c
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SCAL
|
||||
|
||||
@@ -43,9 +49,74 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
@@ -57,6 +128,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -74,7 +146,11 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -87,25 +163,35 @@ int main(int argc, char *argv[]){
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SCAL (&m, alpha, x, &inc_x);
|
||||
}
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
timeg = time1 / loops;
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
@@ -2,47 +2,61 @@
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
if (!is.null(options("matprod")[[1]])) options(matprod = "blas")
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
loops <- 1
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
if (length(argv) > 0) {
|
||||
for (z in 1:length(argv)) {
|
||||
if (z == 1) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if (z == 2) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if (z == 3) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if (z == 4) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n", nfrom, nto, nstep, loops))
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(rnorm(n * n), nrow = n)
|
||||
ev <- 0
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
ev <- eigen(A)
|
||||
})
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
mflops <- (26.66 * n * n * n) * loops / (z[3] * 1e+06)
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
ev <- eigen(A)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
n <- n + nstep
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2,50 +2,62 @@
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
if (!is.null(options("matprod")[[1]])) options(matprod = "blas")
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
loops <- 1
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
if (length(argv) > 0) {
|
||||
for (z in 1:length(argv)) {
|
||||
if (z == 1) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if (z == 2) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if (z == 3) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if (z == 4) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n", nfrom, nto, nstep, loops))
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(runif(n * n), nrow = n)
|
||||
B <- matrix(runif(n * n), nrow = n)
|
||||
C <- 1
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
C <- A %*% B
|
||||
l <- l + 1
|
||||
})
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
mflops <- (2.0 * n * n * n) * loops / (z[3] * 1e+06)
|
||||
start <- proc.time()[3]
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
while ( l <= loops ) {
|
||||
|
||||
C <- A %*% B
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
n <- n + nstep
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2,48 +2,62 @@
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
if (!is.null(options("matprod")[[1]])) options(matprod = "blas")
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
loops <- 1
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
if (length(argv) > 0) {
|
||||
for (z in 1:length(argv)) {
|
||||
if (z == 1) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if (z == 2) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if (z == 3) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if (z == 4) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n", nfrom, nto, nstep, loops))
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(rnorm(n * n), nrow = n)
|
||||
B <- matrix(rnorm(n * n), nrow = n)
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
solve(A, B)
|
||||
})
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
mflops <- (8.0 / 3 * n * n * n) * loops / (z[3] * 1e+06)
|
||||
start <- proc.time()[3]
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
while ( l <= loops ) {
|
||||
|
||||
solve(A,B)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
n <- n + nstep
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy import zeros
|
||||
from numpy.random import randn
|
||||
from scipy.linalg import blas
|
||||
|
||||
|
||||
def run_dsyrk(N, l):
|
||||
|
||||
A = randn(N, N).astype('float64', order='F')
|
||||
C = zeros((N, N), dtype='float64', order='F')
|
||||
|
||||
start = time.time()
|
||||
for i in range(0, l):
|
||||
blas.dsyrk(1.0, A, c=C, overwrite_c=True)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end - start)
|
||||
mflops = (N * N * N) * l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N, N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N = 128
|
||||
NMAX = 2048
|
||||
NINC = 128
|
||||
LOOPS = 1
|
||||
|
||||
z = 0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p)
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range(N, NMAX + NINC, NINC):
|
||||
run_dsyrk(i, LOOPS)
|
||||
@@ -1,58 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy import zeros
|
||||
from numpy.random import randn
|
||||
from scipy.linalg import blas
|
||||
|
||||
|
||||
def run_ssyrk(N, l):
|
||||
|
||||
A = randn(N, N).astype('float32', order='F')
|
||||
C = zeros((N, N), dtype='float32', order='F')
|
||||
|
||||
start = time.time()
|
||||
for i in range(0, l):
|
||||
blas.ssyrk(1.0, A, c=C, overwrite_c=True)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end - start)
|
||||
mflops = (N * N * N) * l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N, N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N = 128
|
||||
NMAX = 2048
|
||||
NINC = 128
|
||||
LOOPS = 1
|
||||
|
||||
z = 0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p)
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range(N, NMAX + NINC, NINC):
|
||||
run_ssyrk(i, LOOPS)
|
||||
@@ -1,197 +0,0 @@
|
||||
// run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <cblas.h>
|
||||
#include <omp.h>
|
||||
#include <pthread.h>
|
||||
#define MIN_SIZE 5
|
||||
#define MAX_SIZE 60
|
||||
#define NB_SIZE 10
|
||||
|
||||
// number of loop for a 1x1 matrix. Lower it if the test is
|
||||
// too slow on you computer.
|
||||
#define NLOOP 2e7
|
||||
|
||||
typedef struct {
|
||||
int matrix_size;
|
||||
int n_loop;
|
||||
void (* bench_func)();
|
||||
void (* blas_func)();
|
||||
void * (* create_matrix)(int size);
|
||||
} BenchParam;
|
||||
|
||||
void * s_create_matrix(int size) {
|
||||
float * r = malloc(size * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void * c_create_matrix(int size) {
|
||||
float * r = malloc(size * 2 * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < 2 * size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void * z_create_matrix(int size) {
|
||||
double * r = malloc(size * 2 * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < 2 * size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void * d_create_matrix(int size) {
|
||||
double * r = malloc(size * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void trmv_bench(BenchParam * param)
|
||||
{
|
||||
int i, n;
|
||||
int size = param->matrix_size;
|
||||
n = param->n_loop / size;
|
||||
int one = 1;
|
||||
void * A = param->create_matrix(size * size);
|
||||
void * y = param->create_matrix(size);
|
||||
for(i = 0; i < n; i++) {
|
||||
param->blas_func("U", "N", "N", &size, A, &size, y, &one);
|
||||
}
|
||||
free(A);
|
||||
free(y);
|
||||
}
|
||||
|
||||
void gemv_bench(BenchParam * param)
|
||||
{
|
||||
int i, n;
|
||||
int size = param->matrix_size;
|
||||
n = param->n_loop / size;
|
||||
double v = 1.01;
|
||||
int one = 1;
|
||||
void * A = param->create_matrix(size * size);
|
||||
void * y = param->create_matrix(size);
|
||||
for(i = 0; i < n; i++) {
|
||||
param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one);
|
||||
}
|
||||
free(A);
|
||||
free(y);
|
||||
}
|
||||
|
||||
void ger_bench(BenchParam * param) {
|
||||
int i, n;
|
||||
int size = param->matrix_size;
|
||||
n = param->n_loop / size;
|
||||
double v = 1.01;
|
||||
int one = 1;
|
||||
void * A = param->create_matrix(size * size);
|
||||
void * y = param->create_matrix(size);
|
||||
for(i = 0; i < n; i++) {
|
||||
param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size);
|
||||
}
|
||||
free(A);
|
||||
free(y);
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
void * pthread_func_wrapper(void * param) {
|
||||
((BenchParam *)param)->bench_func(param);
|
||||
pthread_exit(NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define NB_TESTS 5
|
||||
void * TESTS[4 * NB_TESTS] = {
|
||||
trmv_bench, ztrmv_, z_create_matrix, "ztrmv",
|
||||
gemv_bench, dgemv_, d_create_matrix, "dgemv",
|
||||
gemv_bench, zgemv_, z_create_matrix, "zgemv",
|
||||
ger_bench, dger_, d_create_matrix, "dger",
|
||||
ger_bench, zgerc_, z_create_matrix, "zgerc",
|
||||
};
|
||||
|
||||
inline static double delta_time(struct timespec tick) {
|
||||
struct timespec tock;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tock);
|
||||
return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9;
|
||||
}
|
||||
|
||||
double pthread_bench(BenchParam * param, int nb_threads)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return 0;
|
||||
#else
|
||||
BenchParam threaded_param = *param;
|
||||
pthread_t threads[nb_threads];
|
||||
int t, rc;
|
||||
struct timespec tick;
|
||||
threaded_param.n_loop /= nb_threads;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||
for(t=0; t<nb_threads; t++){
|
||||
rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param);
|
||||
if (rc){
|
||||
printf("ERROR; return code from pthread_create() is %d\n", rc);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
for(t=0; t<nb_threads; t++){
|
||||
pthread_join(threads[t], NULL);
|
||||
}
|
||||
return delta_time(tick);
|
||||
#endif
|
||||
}
|
||||
|
||||
double seq_bench(BenchParam * param) {
|
||||
struct timespec tick;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||
param->bench_func(param);
|
||||
return delta_time(tick);
|
||||
}
|
||||
|
||||
double omp_bench(BenchParam * param) {
|
||||
BenchParam threaded_param = *param;
|
||||
struct timespec tick;
|
||||
int t;
|
||||
int nb_threads = omp_get_max_threads();
|
||||
threaded_param.n_loop /= nb_threads;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||
#pragma omp parallel for
|
||||
for(t = 0; t < nb_threads; t ++){
|
||||
param->bench_func(&threaded_param);
|
||||
}
|
||||
return delta_time(tick);
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE);
|
||||
BenchParam param;
|
||||
int test_id;
|
||||
printf ("Running on %d threads\n", omp_get_max_threads());
|
||||
for(test_id = 0; test_id < NB_TESTS; test_id ++) {
|
||||
double size = MIN_SIZE;
|
||||
param.bench_func = TESTS[test_id * 4];
|
||||
param.blas_func = TESTS[test_id * 4 + 1];
|
||||
param.create_matrix = TESTS[test_id * 4 + 2];
|
||||
printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]);
|
||||
param.n_loop = NLOOP;
|
||||
while(size <= MAX_SIZE) {
|
||||
param.matrix_size = (int)(size + 0.5);
|
||||
double seq_time = seq_bench(¶m);
|
||||
double omp_time = omp_bench(¶m);
|
||||
double pthread_time = pthread_bench(¶m, omp_get_max_threads());
|
||||
printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, "
|
||||
"pthread %gs, speedup %g\n",
|
||||
param.matrix_size, seq_time,
|
||||
omp_time, seq_time / omp_time,
|
||||
pthread_time, seq_time / pthread_time);
|
||||
size *= inc_factor;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
146
benchmark/spmv.c
146
benchmark/spmv.c
@@ -1,146 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef SPMV
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPMV BLASFUNC(dspmv)
|
||||
#else
|
||||
#define SPMV BLASFUNC(sspmv)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPMV BLASFUNC(zspmv)
|
||||
#else
|
||||
#define SPMV BLASFUNC(cspmv)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char uplo='L';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m,(int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
|
||||
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
124
benchmark/spr.c
124
benchmark/spr.c
@@ -1,124 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef SPR
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPR BLASFUNC(dspr)
|
||||
#else
|
||||
#define SPR BLASFUNC(sspr)
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d\n", from, to, step,uplo,inc_x);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
SPR (&uplo, &m, alpha, c, &inc_x, a);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
135
benchmark/spr2.c
135
benchmark/spr2.c
@@ -1,135 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
|
||||
#undef SPR2
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SPR2 BLASFUNC(dspr2)
|
||||
#else
|
||||
#define SPR2 BLASFUNC(sspr2)
|
||||
#endif
|
||||
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*b,*c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -25,7 +25,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SWAP
|
||||
@@ -44,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
@@ -58,6 +128,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -80,7 +151,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -104,13 +175,13 @@ int main(int argc, char *argv[]){
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SWAP (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
@@ -119,8 +190,8 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYMM
|
||||
|
||||
@@ -47,6 +53,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
@@ -66,6 +137,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -90,7 +162,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -103,19 +175,21 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
|
||||
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYMV
|
||||
|
||||
@@ -47,6 +53,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
@@ -63,6 +134,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -90,7 +162,7 @@ int main(int argc, char *argv[]){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -105,7 +177,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -120,13 +192,13 @@ int main(int argc, char *argv[]){
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
||||
113
benchmark/syr.c
113
benchmark/syr.c
@@ -1,113 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
|
||||
#undef SYR
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYR BLASFUNC(dsyr)
|
||||
#else
|
||||
#define SYR BLASFUNC(ssyr)
|
||||
#endif
|
||||
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x,*a;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
blasint m, i, j;
|
||||
blasint inc_x= 1;
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d\n", from, to, step,uplo,inc_x);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
SYR (&uplo, &m, alpha, x, &inc_x, a, &m );
|
||||
|
||||
end();
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
127
benchmark/syr2.c
127
benchmark/syr2.c
@@ -1,127 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef SYR2
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define SYR2 BLASFUNC(dsyr2)
|
||||
#else
|
||||
#define SYR2 BLASFUNC(ssyr2)
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y, *a;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
char *p;
|
||||
|
||||
char uplo='U';
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
|
||||
blasint m, i, j, l;
|
||||
blasint inc_x= 1;
|
||||
blasint inc_y= 1;
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
int loops = 1;
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg = 0.;
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for (l = 0; l < loops; l++) {
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
|
||||
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
|
||||
|
||||
end();
|
||||
|
||||
timeg += getsec();
|
||||
} // loops
|
||||
|
||||
time1 = timeg/(double)loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -25,7 +25,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYR2K
|
||||
@@ -48,6 +53,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
@@ -67,6 +137,7 @@ int main(int argc, char *argv[]){
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
@@ -91,7 +162,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -104,19 +175,21 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
|
||||
108
benchmark/syrk.c
108
benchmark/syrk.c
@@ -1,5 +1,5 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, 2023 The OpenBLAS Project
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYRK
|
||||
|
||||
@@ -47,6 +53,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
@@ -56,20 +127,18 @@ int main(int argc, char *argv[]){
|
||||
|
||||
char uplo='U';
|
||||
char trans='N';
|
||||
|
||||
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
blasint m, i, j, l;
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
int loops = 1;
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
|
||||
|
||||
double time1,timeg;
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
@@ -77,7 +146,7 @@ int main(int argc, char *argv[]){
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Loops = %d\n", from, to, step,uplo,trans,loops);
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
|
||||
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
@@ -90,7 +159,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
@@ -98,29 +167,26 @@ int main(int argc, char *argv[]){
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
timeg = 0.;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for(l = 0; l < loops; l++) {
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
timeg += getsec();
|
||||
|
||||
} //loops
|
||||
time1 = timeg / (double)loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user