Compare commits

..

No commits in common. "develop" and "issue2419" have entirely different histories.

9981 changed files with 140899 additions and 2718683 deletions

View File

@ -1,174 +0,0 @@
macos_instance:
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
#task:
# name: AppleM1/LLVM
# compile_script:
# - brew install llvm
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang
#task:
# name: AppleM1/LLVM/ILP64
# compile_script:
# - brew install llvm
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang INTERFACE64=1
#task:
# name: AppleM1/LLVM/CMAKE
# compile_script:
# - brew install llvm
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
# - mkdir build
# - cd build
# - cmake -DTARGET=VORTEX -DCMAKE_C_COMPILER=clang -DBUILD_SHARED_LIBS=ON ..
# - make -j 4
#task:
# name: AppleM1/GCC/MAKE/OPENMP
# compile_script:
# - brew install gcc@11
# - export PATH=/opt/homebrew/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/lib"
# - export CPPFLAGS="-I/opt/homebrew/include"
# - make CC=gcc-11 FC=gfortran-11 USE_OPENMP=1
macos_instance:
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest
task:
name: AppleM1/LLVM x86_64 xbuild
compile_script:
- #brew install llvm
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
- export ARCHS="i386 x86_64"
- export ARCHS_STANDARD="i386 x86_64"
- export ARCHS_STANDARD_32_64_BIT="i386 x86_64"
- export ARCHS_STANDARD_64_BIT=x86_64
- export ARCHS_STANDARD_INCLUDING_64_BIT="i386 x86_64"
- export ARCHS_UNIVERSAL_IPHONE_OS="i386 x86_64"
- export VALID_ARCHS="i386 x86_64"
- xcrun --sdk macosx --show-sdk-path
- xcodebuild -version
- export CC=/Applications/Xcode_15.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_15.4.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.5.sdk -arch x86_64"
- make TARGET=CORE2 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
always:
config_artifacts:
path: "*conf*"
type: text/plain
# lib_artifacts:
# path: "libopenblas*"
# type: application/octet-streamm
macos_instance:
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest
task:
name: AppleM1/LLVM armv8-ios xbuild
compile_script:
- #brew install llvm
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
- export CC=/Applications/Xcode_15.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_15.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.5.sdk -arch arm64 -miphoneos-version-min=10.0"
- xcrun --sdk iphoneos --show-sdk-path
- ls -l /Applications
- make TARGET=ARMV8 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 CROSS=1
always:
config_artifacts:
path: "*conf*"
type: text/plain
macos_instance:
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest
task:
name: AppleM1/LLVM armv7-androidndk xbuild
compile_script:
- brew install --cask android-ndk
- export ANDROID_NDK_HOME="/opt/homebrew/share/android-ndk"
- export CC=/opt/homebrew/share/android-ndk/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi23-clang
- make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
always:
config_artifacts:
path: "*conf*"
type: text/plain
task:
name: NeoverseN1
arm_container:
image: node:latest
compile_script:
- make
task:
name: NeoverseN1-ILP64
arm_container:
image: node:latest
compile_script:
- make INTERFACE64=1
task:
name: NeoverseN1-OMP
arm_container:
image: node:latest
cpu: 8
compile_script:
- make USE_OPENMP=1
FreeBSD_task:
name: FreeBSD-gcc12
freebsd_instance:
image_family: freebsd-13-3
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
compile_script:
- ls -l /usr/local/lib
- gmake CC=gcc
FreeBSD_task:
name: freebsd-gcc12-ilp64
freebsd_instance:
image_family: freebsd-13-3
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
compile_script:
- ls -l /usr/local/lib
- gmake CC=gcc INTERFACE64=1
FreeBSD_task:
name: FreeBSD-clang-openmp
freebsd_instance:
image_family: freebsd-13-3
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
- ln -s /usr/local/lib/gcc13/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
compile_script:
- gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
#task:
# name: Windows/LLVM16 --- too slow ---
# windows_container:
# image: cirrusci/windowsservercore:cmake-2021.12.07
# install_script:
# - choco list --localonly
# - choco install -y llvm
# - # choco install -y cmake --installargs '"ADD_CMAKE_TO_PATH=System"'
# - choco install -y ninja
# - refreshenv
# - cd "c:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Auxiliary/Build"
# - vcvarsall x64
# - cd "C:\Users\ContainerAdministrator\AppData\Local\Temp\cirrus-ci-build"
# - cmake -S . -B build -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release
# - cd build
# - cmake --build .
# - ctest

View File

@ -1,16 +0,0 @@
# Self-Hosted Github Action Runners on AWS via Cirun.io
# Reference: https://docs.cirun.io/reference/yaml
runners:
- name: "aws-runner-graviton"
# Cloud Provider: AWS
cloud: "aws"
region: "us-east-1"
# Cheapest VM on AWS
instance_type: "c7g.large"
# Ubuntu-22.04, ami image
machine_image: "ami-0a0c8eebcdd6dcbd0"
preemptible: false
# Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
# So that this runner is created for running the workflow
labels:
- "cirun-aws-runner-graviton"

View File

@ -8,7 +8,7 @@ platform:
steps:
- name: Build and Test
image: ubuntu:18.04
image: ubuntu:19.04
environment:
CC: gcc
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
@ -32,7 +32,7 @@ platform:
steps:
- name: Build and Test
image: ubuntu:18.04
image: ubuntu:19.04
environment:
CC: gcc
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV6 NUM_THREADS=32'
@ -141,76 +141,3 @@ steps:
- cmake $CMAKE_FLAGS ..
- make -j
- ctest -V
---
kind: pipeline
name: arm64_native_test
platform:
os: linux
arch: arm64
steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'USE_OPENMP=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C cpp_thread_test dgemm_tester
---
kind: pipeline
name: epyc_native_test
platform:
os: linux
arch: amd64
steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'USE_OPENMP=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C cpp_thread_test dgemm_tester
---
kind: pipeline
name: arm64_gcc10
platform:
os: linux
arch: arm64
steps:
- name: Build and Test
image: ubuntu:20.04
environment:
CC: gcc-10
FC: gfortran-10
COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran-10 perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C test $COMMON_FLAGS

View File

@ -1,149 +0,0 @@
name: apple m
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: macos-14
strategy:
fail-fast: false
matrix:
build: [cmake, make]
fortran: [gfortran]
openmp: [0, 1]
ilp64: [0, 1]
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
elif [ "$RUNNER_OS" == "macOS" ]; then
sysctl -a | grep machdep.cpu
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get install -y gfortran cmake ccache libtinfo5
elif [ "$RUNNER_OS" == "macOS" ]; then
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
brew reinstall gcc
brew install coreutils cmake ccache
brew install llvm
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}
- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
elif [ "$RUNNER_OS" = "macOS" ]; then
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
echo "/opt/homebrew/opt/llvm/bin" >>$GITHUB_PATH
echo "" >>$GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Build OpenBLAS
run: |
export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
export CC="/opt/homebrew/opt/llvm/bin/clang"
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=${{matrix.openmp}} INTERFACE64=${{matrix.ilp64}} FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
export LDFLAGS="$LDFLAGS -Wl,-ld_classic"
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DUSE_OPENMP=${{matrix.openmp}} \
-DINTERFACE64=${{matrix.ilp64}} \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac
- name: Show ccache status
continue-on-error: true
run: ccache -s
- name: Run tests
timeout-minutes: 60
run: |
case "${{ matrix.build }}" in
"make")
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
echo "::group::Tests in 'test' directory"
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'ctest' directory"
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'utest' directory"
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
;;
"cmake")
cd build && ctest
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

View File

@ -1,139 +0,0 @@
name: arm64 graviton cirun
on:
push:
branches:
- develop
- release-**
pull_request:
branches:
- develop
- release-**
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}"
strategy:
fail-fast: false
matrix:
fortran: [gfortran]
build: [cmake, make]
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt update
sudo apt-get install -y gfortran cmake ccache libtinfo5
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}
- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Build OpenBLAS
run: |
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac
- name: Show ccache status
continue-on-error: true
run: ccache -s
- name: Run tests
timeout-minutes: 60
run: |
case "${{ matrix.build }}" in
"make")
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
echo "::group::Tests in 'test' directory"
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'ctest' directory"
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'utest' directory"
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
;;
"cmake")
cd build && ctest
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

View File

@ -1,127 +0,0 @@
name: c910v qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
env:
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1698113812618
toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0-20231018.tar.gz
strategy:
fail-fast: false
matrix:
include:
- target: RISCV64_GENERIC
triple: riscv64-linux-gnu
apt_triple: riscv64-linux-gnu
opts: NO_SHARED=1 TARGET=RISCV64_GENERIC
- target: C910V
triple: riscv64-unknown-linux-gnu
apt_triple: riscv64-linux-gnu
opts: NO_SHARED=1 TARGET=C910V
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
gcc-${{ matrix.apt_triple }} gfortran-${{ matrix.apt_triple }} libgomp1-riscv64-cross
- name: checkout qemu
uses: actions/checkout@v3
with:
repository: T-head-Semi/qemu
path: qemu
ref: 1e692ebb43d396c52352406323fc782c1ac99a42
- name: build qemu
run: |
# Force use c910v qemu-user
wget https://github.com/revyos/qemu/commit/5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch
cd qemu
patch -p1 < ../5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system
make -j$(nproc)
make install
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: build OpenBLAS
run: |
wget ${xuetie_toolchain}/${toolchain_file_name}
tar -xvf ${toolchain_file_name} -C /opt
export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0/bin:$PATH"
make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-riscv64 ./utest/openblas_utest
qemu-riscv64 ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat

View File

@ -1,157 +0,0 @@
name: Run codspeed benchmarks
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
benchmarks:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
fortran: [gfortran]
build: [make]
pyver: ["3.12"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: ${{ matrix.pyver }}
- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
fi
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get update
sudo apt-get install -y gfortran cmake ccache libtinfo5
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}
- name: Write out the .pc
run: |
cd benchmark/pybench
cat > openblas.pc << EOF
libdir=${{ github.workspace }}
includedir= ${{ github.workspace }}
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
version=0.0.99
extralib=-lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
Name: openblas
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: ${version}
URL: https://github.com/xianyi/OpenBLAS
Libs: ${{ github.workspace }}/libopenblas.so -Wl,-rpath,${{ github.workspace }}
Libs.private: -lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
Cflags: -I${{ github.workspace}}
EOF
cat openblas.pc
- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
elif [ "$RUNNER_OS" = "macOS" ]; then
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Build OpenBLAS
run: |
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac
- name: Show ccache status
continue-on-error: true
run: ccache -s
- name: Install benchmark dependencies
run: pip install meson ninja numpy pytest pytest-codspeed --user
- name: Build the wrapper
run: |
cd benchmark/pybench
export PKG_CONFIG_PATH=$PWD
meson setup build --prefix=$PWD/build-install
meson install -C build
#
# sanity check
cd build/openblas_wrap
python -c'import _flapack; print(dir(_flapack))'
- name: Run benchmarks under pytest-benchmark
run: |
cd benchmark/pybench
pip install pytest-benchmark
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py -k 'gesdd'
- name: Run benchmarks
uses: CodSpeedHQ/action@v2
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: |
cd benchmark/pybench
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py --codspeed

View File

@ -1,40 +0,0 @@
name: Publish docs via GitHub Pages
on:
push:
branches:
- develop
pull_request:
branches:
- develop
jobs:
build:
name: Deploy docs
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- uses: actions/setup-python@v5
with:
python-version: "3.10"
- name: Install MkDocs and doc theme packages
run: pip install mkdocs mkdocs-material mkdocs-git-revision-date-localized-plugin
- name: Build docs site
run: mkdocs build
# mkdocs gh-deploy command only builds to the top-level, hence deploying
# with this action instead.
# Deploys to http://www.openmathlib.org/OpenBLAS/docs/
- name: Deploy docs
uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
if: ${{ github.ref == 'refs/heads/develop' }}
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./site
destination_dir: docs/

View File

@ -1,371 +0,0 @@
name: continuous build
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
fortran: [gfortran, flang]
build: [cmake, make]
exclude:
- os: macos-latest
fortran: flang
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
elif [ "$RUNNER_OS" == "macOS" ]; then
sysctl -a | grep machdep.cpu
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get update
sudo apt-get install -y gfortran cmake ccache libtinfo5
elif [ "$RUNNER_OS" == "macOS" ]; then
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
brew reinstall gcc
brew install coreutils cmake ccache
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}
- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
elif [ "$RUNNER_OS" = "macOS" ]; then
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Build OpenBLAS
run: |
if [ "${{ matrix.fortran }}" = "flang" ]; then
# download and install classic flang
cd /usr/
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz
sudo tar xf flang-20190329-x86-70.tgz
sudo rm flang-20190329-x86-70.tgz
cd -
fi
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac
- name: Show ccache status
continue-on-error: true
run: ccache -s
- name: Run tests
timeout-minutes: 60
run: |
case "${{ matrix.build }}" in
"make")
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
echo "::group::Tests in 'test' directory"
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'ctest' directory"
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'utest' directory"
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
;;
"cmake")
cd build && ctest
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac
msys2:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
idx: [int32, int64]
build-type: [Release]
include:
- msystem: UCRT64
idx: int32
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
- msystem: MINGW32
idx: int32
target-prefix: mingw-w64-i686
fc-pkg: fc
- msystem: CLANG64
idx: int32
target-prefix: mingw-w64-clang-x86_64
fc-pkg: fc
# Compiling with Flang 16 seems to cause test errors on machines
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
no-avx512-flags: -DNO_AVX512=1
- msystem: CLANG32
idx: int32
target-prefix: mingw-w64-clang-i686
fc-pkg: cc
c-lapack-flags: -DC_LAPACK=ON
- msystem: UCRT64
idx: int64
idx64-flags: -DBINARY=64 -DINTERFACE64=1
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
- msystem: CLANG64
idx: int64
idx64-flags: -DBINARY=64 -DINTERFACE64=1
target-prefix: mingw-w64-clang-x86_64
fc-pkg: fc
# Compiling with Flang 16 seems to cause test errors on machines
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
no-avx512-flags: -DNO_AVX512=1
- msystem: UCRT64
idx: int32
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
build-type: None
exclude:
- msystem: MINGW32
idx: int64
- msystem: CLANG32
idx: int64
defaults:
run:
# Use MSYS2 bash as default shell
shell: msys2 {0}
env:
CHERE_INVOKING: 1
steps:
- name: Get CPU name
shell: pwsh
run : |
Get-CIMInstance -Class Win32_Processor | Select-Object -Property Name
- name: Install build dependencies
uses: msys2/setup-msys2@v2
with:
msystem: ${{ matrix.msystem }}
update: true
release: false # Use pre-installed version
install: >-
base-devel
${{ matrix.target-prefix }}-cc
${{ matrix.target-prefix }}-${{ matrix.fc-pkg }}
${{ matrix.target-prefix }}-cmake
${{ matrix.target-prefix }}-ninja
${{ matrix.target-prefix }}-ccache
- name: Checkout repository
uses: actions/checkout@v3
- name: Prepare ccache
# Get cache location of ccache
# Create key that is used in action/cache/restore and action/cache/save steps
id: ccache-prepare
run: |
echo "ccachedir=$(cygpath -m $(ccache -k cache_dir))" >> $GITHUB_OUTPUT
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
echo "key=ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}" >> $GITHUB_OUTPUT
- name: Restore ccache
uses: actions/cache/restore@v3
with:
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
key: ${{ steps.ccache-prepare.outputs.key }}
# Restore a matching ccache cache entry. Prefer same branch.
restore-keys: |
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}
- name: Configure ccache
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota.
run: |
which ccache
test -d ${{ steps.ccache-prepare.outputs.ccachedir }} || mkdir -p ${{ steps.ccache-prepare.outputs.ccachedir }}
echo "max_size = 250M" > ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
echo "compression = true" >> ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
ccache -p
ccache -s
echo $HOME
cygpath -w $HOME
- name: Configure OpenBLAS
run: |
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
-DBUILD_SHARED_LIBS=ON \
-DBUILD_STATIC_LIBS=ON \
-DDYNAMIC_ARCH=ON \
-DUSE_THREAD=ON \
-DNUM_THREADS=64 \
-DTARGET=CORE2 \
${{ matrix.idx64-flags }} \
${{ matrix.c-lapack-flags }} \
${{ matrix.no-avx512-flags }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
- name: Build OpenBLAS
run: cd build && cmake --build .
- name: Show ccache status
continue-on-error: true
run: ccache -s
- name: Save ccache
# Save the cache after we are done (successfully) building
uses: actions/cache/save@v3
with:
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
key: ${{ steps.ccache-prepare.outputs.key }}
- name: Run tests
id: run-ctest
timeout-minutes: 60
run: cd build && ctest
- name: Re-run tests
if: always() && (steps.run-ctest.outcome == 'failure')
timeout-minutes: 60
run: |
cd build
echo "::group::Re-run ctest"
ctest --rerun-failed --output-on-failure || true
echo "::endgroup::"
echo "::group::Log from these tests"
[ ! -f Testing/Temporary/LastTest.log ] || cat Testing/Temporary/LastTest.log
echo "::endgroup::"
cross_build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
include:
- target: mips64el
triple: mips64el-linux-gnuabi64
opts: DYNAMIC_ARCH=1 TARGET=GENERIC
- target: riscv64
triple: riscv64-linux-gnu
opts: TARGET=RISCV64_GENERIC
- target: mipsel
triple: mipsel-linux-gnu
opts: TARGET=MIPS1004K
- target: alpha
triple: alpha-linux-gnu
opts: TARGET=EV4
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install Dependencies
run: |
sudo apt-get update
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Build OpenBLAS
run: |
make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }}

View File

@ -1,119 +0,0 @@
name: loongarch64 qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
include:
- target: LOONGSONGENERIC
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC
- target: LOONGSON3R5
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
- target: LOONGSON2K1000
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
- target: LA64_GENERIC
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
- target: LA464
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
- target: LA264
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
- target: DYNAMIC_ARCH
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install APT deps
run: |
sudo apt-get update && \
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache qemu-user-static \
gcc-14-loongarch64-linux-gnu g++-14-loongarch64-linux-gnu gfortran-14-loongarch64-linux-gnu
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Disable utest dsdot:dsdot_n_1
run: |
echo -n > utest/test_dsdot.c
echo "Due to the current version of qemu causing utest cases to fail,"
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."
- name: Build OpenBLAS
run: |
make CC='ccache ${{ matrix.triple }}-gcc-14 -static' FC='ccache ${{ matrix.triple }}-gfortran-14 -static' \
RANLIB='ccache ${{ matrix.triple }}-gcc-ranlib-14' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
- name: Test
run: |
qemu-loongarch64-static ./utest/openblas_utest
qemu-loongarch64-static ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat

View File

@ -1,141 +0,0 @@
name: loongarch64 clang qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- target: LOONGSONGENERIC
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC
- target: LOONGSON3R5
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
- target: LOONGSON2K1000
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
- target: LA64_GENERIC
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
- target: LA464
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
- target: LA264
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
- target: DYNAMIC_ARCH
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install libffi6
run: |
wget http://ftp.ca.debian.org/debian/pool/main/libf/libffi/libffi6_3.2.1-9_amd64.deb
sudo dpkg -i libffi6_3.2.1-9_amd64.deb
- name: Install APT deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache
- name: Download and install loongarch64-toolchain
run: |
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz
tar -xf clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz -C /opt
tar -xf loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz -C /opt
- name: Checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: master
- name: Install qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static
make -j$(nproc)
make install
- name: Set env
run: |
echo "PATH=$GITHUB_WORKSPACE:/opt/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10/bin:/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/bin:$PATH" >> $GITHUB_ENV
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Disable utest dsdot:dsdot_n_1
run: |
echo -n > utest/test_dsdot.c
echo "Due to the qemu versions 7.2 causing utest cases to fail,"
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."
- name: Build OpenBLAS
run: make CC='ccache clang --target=loongarch64-linux-gnu --sysroot=/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/loongarch64-linux-gnu/sysroot/ -static' FC='ccache loongarch64-linux-gnu-gfortran -static' HOSTCC='ccache clang' CROSS_SUFFIX=llvm- NO_SHARED=1 ${{ matrix.opts }} -j$(nproc)
- name: Test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-loongarch64 ./utest/openblas_utest
qemu-loongarch64 ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat

View File

@ -1,123 +0,0 @@
name: mips64 qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- target: MIPS64_GENERIC
triple: mips64el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=MIPS64_GENERIC
- target: SICORTEX
triple: mips64el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=SICORTEX
- target: I6400
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=I6400
- target: P6600
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=P6600
- target: I6500
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=I6500
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross
- name: checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2
- name: build qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system
make -j$(nproc)
make install
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: build OpenBLAS
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-mips64el ./utest/openblas_utest
qemu-mips64el ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat

View File

@ -1,90 +0,0 @@
# Only the "head" branch of the OpenBLAS package is tested
on:
push:
paths:
- '**/nightly-Homebrew-build.yml'
pull_request:
branches:
- develop
paths:
- '**/nightly-Homebrew-build.yml'
schedule:
- cron: 45 7 * * *
# This is 7:45 AM UTC daily, late at night in the USA
# Since push and pull_request will still always be building and testing the `develop` branch,
# it only makes sense to test if this file has been changed
name: Nightly-Homebrew-Build
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build-OpenBLAS-with-Homebrew:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: macos-latest
env:
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer
HOMEBREW_DEVELOPER: "ON"
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON"
HOMEBREW_NO_ANALYTICS: "ON"
HOMEBREW_NO_AUTO_UPDATE: "ON"
HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON"
HOMEBREW_NO_INSTALL_CLEANUP: "ON"
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: "ON"
HOMEBREW_NO_INSTALL_FROM_API: "ON"
steps:
- name: Random delay for cron job
run: |
delay=$(( RANDOM % 600 ))
printf 'Delaying for %s seconds on event %s' ${delay} "${{ github.event_name }}"
sleep ${delay}
if: github.event_name == 'schedule'
- uses: actions/checkout@v2
# This isn't even needed, technically. Homebrew will get `develop` via git
- name: Update Homebrew
if: github.event_name != 'pull_request'
run: brew update || true
- name: Install prerequisites
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas
- name: Install and bottle OpenBLAS
run: brew install --fetch-HEAD --HEAD --build-bottle --keep-tmp openblas
# the HEAD flags tell Homebrew to build the develop branch fetch via git
- name: Create bottle
run: |
brew bottle -v openblas
mkdir bottles
mv *.bottle.tar.gz bottles
- name: Upload bottle
uses: actions/upload-artifact@v3
with:
name: openblas--HEAD.catalina.bottle.tar.gz
path: bottles
- name: Show linkage
run: brew linkage -v openblas
- name: Test openblas
run: brew test --HEAD --verbose openblas
- name: Audit openblas formula
run: |
brew audit --strict openblas
brew cat openblas
- name: Post logs on failure
if: failure()
run: brew gist-logs --with-hostname -v openblas

View File

@ -1,256 +0,0 @@
name: riscv64 zvl256b qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
env:
triple: riscv64-unknown-linux-gnu
riscv_gnu_toolchain: https://github.com/riscv-collab/riscv-gnu-toolchain
riscv_gnu_toolchain_version: 13.2.0
riscv_gnu_toolchain_nightly_download_path: /releases/download/2024.02.02/riscv64-glibc-ubuntu-22.04-llvm-nightly-2024.02.02-nightly.tar.gz
strategy:
fail-fast: false
matrix:
include:
- target: RISCV64_ZVL128B
opts: TARGET=RISCV64_ZVL128B BINARY=64 ARCH=riscv64
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=128,elen=64
- target: RISCV64_ZVL256B
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
- target: DYNAMIC_ARCH=1
opts: TARGET=RISCV64_GENERIC BINARY=64 ARCH=riscv64 DYNAMIC_ARCH=1
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make \
libgomp1-riscv64-cross ccache
wget ${riscv_gnu_toolchain}/${riscv_gnu_toolchain_nightly_download_path}
tar -xvf $(basename ${riscv_gnu_toolchain_nightly_download_path}) -C /opt
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: build OpenBLAS libs
run: |
export PATH="/opt/riscv/bin:$PATH"
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
CC='ccache clang --rtlib=compiler-rt -target ${triple} --sysroot /opt/riscv/sysroot --gcc-toolchain=/opt/riscv/lib/gcc/riscv64-unknown-linux-gnu/${riscv_gnu_toolchain_version}/' \
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
RANLIB='ccache ${triple}-ranlib' \
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
HOSTCC=gcc HOSTFC=gfortran -j$(nproc)
- name: build OpenBLAS tests
run: |
export PATH="/opt/riscv/bin:$PATH"
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
CC='${triple}-gcc' \
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
RANLIB='ccache ${triple}-ranlib' \
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) tests
- name: build lapack-netlib tests
working-directory: ./lapack-netlib/TESTING
run: |
export PATH="/opt/riscv/bin:$PATH"
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
CC='${triple}-gcc' \
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
RANLIB='ccache ${triple}-ranlib' \
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) \
LIN/xlintsts LIN/xlintstc LIN/xlintstd LIN/xlintstz LIN/xlintstrfs \
LIN/xlintstrfc LIN/xlintstrfd LIN/xlintstrfz LIN/xlintstds \
LIN/xlintstzc EIG/xeigtsts EIG/xeigtstc EIG/xeigtstd EIG/xeigtstz \
- name: OpenBLAS tests
shell: bash
run: |
export PATH="/opt/riscv/bin:$PATH"
export QEMU_CPU=${{ matrix.qemu_cpu }}
rm -rf ./test_out
mkdir -p ./test_out
run_test() { local DIR=$1; local CMD=$2; local DATA=$3; local OUTPUT="./test_out/$DIR.$CMD"; \
echo "`pwd`/$DIR/$CMD $DIR/$DATA" >> $OUTPUT; \
if [[ -z $DATA ]]; then qemu-riscv64 ./$DIR/$CMD |& tee $OUTPUT ; \
else qemu-riscv64 ./$DIR/$CMD < ./$DIR/$DATA |& tee $OUTPUT ; fi ; \
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi \
}
run_test test cblat1 &
run_test test cblat2 cblat2.dat &
run_test test cblat3 cblat3.dat &
run_test test dblat1 &
run_test test dblat2 dblat2.dat &
run_test test dblat3 dblat3.dat &
run_test test sblat1 &
run_test test sblat2 sblat2.dat &
run_test test sblat3 sblat3.dat &
run_test test zblat1 &
run_test test zblat2 zblat2.dat &
run_test test zblat3 zblat3.dat &
run_test ctest xccblat1 &
run_test ctest xccblat2 cin2 &
run_test ctest xccblat3 cin3 &
run_test ctest xdcblat1 &
run_test ctest xdcblat2 din2 &
run_test ctest xdcblat3 din3 &
run_test ctest xscblat1 &
run_test ctest xscblat2 sin2 &
run_test ctest xscblat3 sin3 &
run_test ctest xzcblat1 &
run_test ctest xzcblat2 zin2 &
run_test ctest xzcblat3 zin3 &
wait
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi
- name: netlib tests
shell: bash
run: |
: # these take a very long time
echo "Skipping netlib tests in CI"
exit 0
: # comment out exit above to enable the tests
: # probably we want to identify a subset to run in CI
export PATH="/opt/riscv/bin:$PATH"
export QEMU_CPU=${{ matrix.qemu_cpu }}
rm -rf ./test_out
mkdir -p ./test_out
run_test() { local OUTPUT="./test_out/$1"; local DATA="./lapack-netlib/TESTING/$2"; local CMD="./lapack-netlib/TESTING/$3"; \
echo "$4" >> $OUTPUT; \
echo "$CMD" >> $OUTPUT; \
qemu-riscv64 $CMD < $DATA |& tee $OUTPUT; \
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi; \
if grep -q fail $OUTPUT ; then echo "*** FAIL: log contains 'fail'" >> $OUTPUT ; fi ; \
if grep -q rror $OUTPUT | grep -v -q "passed" | grep -v "largest error" ; then echo "*** FAIL: log contains 'error'" >> $OUTPUT ; fi \
}
run_test stest.out stest.in LIN/xlintsts "Testing REAL LAPACK linear equation routines" &
run_test ctest.out ctest.in LIN/xlintstc "Testing COMPLEX LAPACK linear equation routines" &
run_test dtest.out dtest.in LIN/xlintstd "Testing DOUBLE PRECISION LAPACK linear equation routines" &
run_test ztest.out ztest.in LIN/xlintstz "Testing COMPLEX16 LAPACK linear equation routines" &
run_test dstest.out dstest.in LIN/xlintstds "Testing SINGLE-DOUBLE PRECISION LAPACK prototype linear equation routines" &
run_test zctest.out zctest.in LIN/xlintstzc "Testing COMPLEX-COMPLEX16 LAPACK prototype linear equation routines" &
run_test stest_rfp.out stest_rfp.in LIN/xlintstrfs "Testing REAL LAPACK RFP prototype linear equation routines" &
run_test dtest_rfp.out dtest_rfp.in LIN/xlintstrfd "Testing DOUBLE PRECISION LAPACK RFP prototype linear equation routines" &
run_test ctest_rfp.out ctest_rfp.in LIN/xlintstrfc "Testing COMPLEX LAPACK RFP prototype linear equation routines" &
run_test ztest_rfp.out ztest_rfp.in LIN/xlintstrfz "Testing COMPLEX16 LAPACK RFP prototype linear equation routines" &
run_test snep.out nep.in EIG/xeigtsts "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test ssep.out sep.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test sse2.out se2.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test ssvd.out svd.in EIG/xeigtsts "SVD - Testing Singular Value Decomposition routines" &
run_test sec.out sec.in EIG/xeigtsts "SEC - Testing REAL Eigen Condition Routines" &
run_test sed.out sed.in EIG/xeigtsts "SEV - Testing REAL Nonsymmetric Eigenvalue Driver" &
run_test sgg.out sgg.in EIG/xeigtsts "SGG - Testing REAL Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test sgd.out sgd.in EIG/xeigtsts "SGD - Testing REAL Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test ssb.out ssb.in EIG/xeigtsts "SSB - Testing REAL Symmetric Eigenvalue Problem routines" &
run_test ssg.out ssg.in EIG/xeigtsts "SSG - Testing REAL Symmetric Generalized Eigenvalue Problem routines" &
run_test sbal.out sbal.in EIG/xeigtsts "SGEBAL - Testing the balancing of a REAL general matrix" &
run_test sbak.out sbak.in EIG/xeigtsts "SGEBAK - Testing the back transformation of a REAL balanced matrix" &
run_test sgbal.out sgbal.in EIG/xeigtsts "SGGBAL - Testing the balancing of a pair of REAL general matrices" &
run_test sgbak.out sgbak.in EIG/xeigtsts "SGGBAK - Testing the back transformation of a pair of REAL balanced matrices" &
run_test sbb.out sbb.in EIG/xeigtsts "SBB - Testing banded Singular Value Decomposition routines" &
run_test sglm.out glm.in EIG/xeigtsts "GLM - Testing Generalized Linear Regression Model routines" &
run_test sgqr.out gqr.in EIG/xeigtsts "GQR - Testing Generalized QR and RQ factorization routines" &
run_test sgsv.out gsv.in EIG/xeigtsts "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test scsd.out csd.in EIG/xeigtsts "CSD - Testing CS Decomposition routines" &
run_test slse.out lse.in EIG/xeigtsts "LSE - Testing Constrained Linear Least Squares routines" &
run_test cnep.out nep.in EIG/xeigtstc "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test csep.out sep.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test cse2.out se2.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test csvd.out svd.in EIG/xeigtstc "SVD - Testing Singular Value Decomposition routines" &
run_test cec.out cec.in EIG/xeigtstc "CEC - Testing COMPLEX Eigen Condition Routines" &
run_test ced.out ced.in EIG/xeigtstc "CES - Testing COMPLEX Nonsymmetric Schur Form Driver" &
run_test cgg.out cgg.in EIG/xeigtstc "CGG - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test cgd.out cgd.in EIG/xeigtstc "CGD - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test csb.out csb.in EIG/xeigtstc "CHB - Testing Hermitian Eigenvalue Problem routines" &
run_test csg.out csg.in EIG/xeigtstc "CSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
run_test cbal.out cbal.in EIG/xeigtstc "CGEBAL - Testing the balancing of a COMPLEX general matrix" &
run_test cbak.out cbak.in EIG/xeigtstc "CGEBAK - Testing the back transformation of a COMPLEX balanced matrix" &
run_test cgbal.out cgbal.in EIG/xeigtstc "CGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
run_test cgbak.out cgbak.in EIG/xeigtstc "CGGBAK - Testing the back transformation of a pair of COMPLEX balanced matrices" &
run_test cbb.out cbb.in EIG/xeigtstc "CBB - Testing banded Singular Value Decomposition routines" &
run_test cglm.out glm.in EIG/xeigtstc "GLM - Testing Generalized Linear Regression Model routines" &
run_test cgqr.out gqr.in EIG/xeigtstc "GQR - Testing Generalized QR and RQ factorization routines" &
run_test cgsv.out gsv.in EIG/xeigtstc "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test ccsd.out csd.in EIG/xeigtstc "CSD - Testing CS Decomposition routines" &
run_test clse.out lse.in EIG/xeigtstc "LSE - Testing Constrained Linear Least Squares routines" &
run_test dnep.out nep.in EIG/xeigtstd "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test dsep.out sep.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test dse2.out se2.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test dsvd.out svd.in EIG/xeigtstd "SVD - Testing Singular Value Decomposition routines" &
run_test dec.out dec.in EIG/xeigtstd "DEC - Testing DOUBLE PRECISION Eigen Condition Routines" &
run_test ded.out ded.in EIG/xeigtstd "DEV - Testing DOUBLE PRECISION Nonsymmetric Eigenvalue Driver" &
run_test dgg.out dgg.in EIG/xeigtstd "DGG - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test dgd.out dgd.in EIG/xeigtstd "DGD - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test dsb.out dsb.in EIG/xeigtstd "DSB - Testing DOUBLE PRECISION Symmetric Eigenvalue Problem routines" &
run_test dsg.out dsg.in EIG/xeigtstd "DSG - Testing DOUBLE PRECISION Symmetric Generalized Eigenvalue Problem routines" &
run_test dbal.out dbal.in EIG/xeigtstd "DGEBAL - Testing the balancing of a DOUBLE PRECISION general matrix" &
run_test dbak.out dbak.in EIG/xeigtstd "DGEBAK - Testing the back transformation of a DOUBLE PRECISION balanced matrix" &
run_test dgbal.out dgbal.in EIG/xeigtstd "DGGBAL - Testing the balancing of a pair of DOUBLE PRECISION general matrices" &
run_test dgbak.out dgbak.in EIG/xeigtstd "DGGBAK - Testing the back transformation of a pair of DOUBLE PRECISION balanced matrices" &
run_test dbb.out dbb.in EIG/xeigtstd "DBB - Testing banded Singular Value Decomposition routines" &
run_test dglm.out glm.in EIG/xeigtstd "GLM - Testing Generalized Linear Regression Model routines" &
run_test dgqr.out gqr.in EIG/xeigtstd "GQR - Testing Generalized QR and RQ factorization routines" &
run_test dgsv.out gsv.in EIG/xeigtstd "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test dcsd.out csd.in EIG/xeigtstd "CSD - Testing CS Decomposition routines" &
run_test dlse.out lse.in EIG/xeigtstd "LSE - Testing Constrained Linear Least Squares routines" &
run_test znep.out nep.in EIG/xeigtstz "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test zsep.out sep.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test zse2.out se2.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test zsvd.out svd.in EIG/xeigtstz "SVD - Testing Singular Value Decomposition routines" &
run_test zec.out zec.in EIG/xeigtstz "ZEC - Testing COMPLEX16 Eigen Condition Routines" &
run_test zed.out zed.in EIG/xeigtstz "ZES - Testing COMPLEX16 Nonsymmetric Schur Form Driver" &
run_test zgg.out zgg.in EIG/xeigtstz "ZGG - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test zgd.out zgd.in EIG/xeigtstz "ZGD - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test zsb.out zsb.in EIG/xeigtstz "ZHB - Testing Hermitian Eigenvalue Problem routines" &
run_test zsg.out zsg.in EIG/xeigtstz "ZSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
run_test zbal.out zbal.in EIG/xeigtstz "ZGEBAL - Testing the balancing of a COMPLEX16 general matrix" &
run_test zbak.out zbak.in EIG/xeigtstz "ZGEBAK - Testing the back transformation of a COMPLEX16 balanced matrix" &
run_test zgbal.out zgbal.in EIG/xeigtstz "ZGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
run_test zgbak.out zgbak.in EIG/xeigtstz "ZGGBAK - Testing the back transformation of a pair of COMPLEX16 balanced matrices" &
run_test zbb.out zbb.in EIG/xeigtstz "ZBB - Testing banded Singular Value Decomposition routines" &
run_test zglm.out glm.in EIG/xeigtstz "GLM - Testing Generalized Linear Regression Model routines" &
run_test zgqr.out gqr.in EIG/xeigtstz "GQR - Testing Generalized QR and RQ factorization routines" &
run_test zgsv.out gsv.in EIG/xeigtstz "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test zcsd.out csd.in EIG/xeigtstz "CSD - Testing CS Decomposition routines" &
run_test zlse.out lse.in EIG/xeigtstz "LSE - Testing Constrained Linear Least Squares routines" &
wait
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
python ./lapack-netlib/lapack_testing.py -d ./test_out -e > netlib_summary
TOTALS="$(grep 'ALL PRECISIONS' netlib_summary)"
NUMERICAL_ERRORS=-1
OTHER_ERRORS=-1
. <(awk '/ALL PRECISIONS/{printf "NUMERICAL_ERRORS=%s\nOTHER_ERRORS=%s\n", $5, $7}' netlib_summary
if (( NUMERICAL_ERRORS != 0 )) || (( OTHER_ERRORS != 0 )) ; then cat netlib_summary ; FAILURES=1 ; fi
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi

21
.gitignore vendored
View File

@ -14,7 +14,6 @@ lapack-3.4.2
lapack-3.4.2.tgz
lapack-netlib/make.inc
lapack-netlib/lapacke/include/lapacke_mangling.h
lapack-netlib/SRC/la_constants.mod
lapack-netlib/TESTING/testing_results.txt
lapack-netlib/INSTALL/test*
lapack-netlib/TESTING/xeigtstc
@ -47,66 +46,46 @@ config_last.h
getarch
getarch_2nd
utest/openblas_utest
utest/openblas_utest_ext
ctest/xccblat1
ctest/xccblat2
ctest/xccblat3
ctest/xccblat3_3m
ctest/xdcblat1
ctest/xdcblat2
ctest/xdcblat3
ctest/xdcblat3_3m
ctest/xscblat1
ctest/xscblat2
ctest/xscblat3
ctest/xscblat3_3m
ctest/xzcblat1
ctest/xzcblat2
ctest/xzcblat3
ctest/xzcblat3_3m
exports/linktest.c
exports/linux.def
kernel/setparam_*.c
kernel/kernel_*.h
test/CBLAT2.SUMM
test/CBLAT3.SUMM
test/CBLAT3_3M.SUMM
test/DBLAT2.SUMM
test/DBLAT3.SUMM
test/DBLAT3_3M.SUMM
test/SBLAT2.SUMM
test/SBLAT3.SUMM
test/SBLAT3_3M.SUMM
test/ZBLAT2.SUMM
test/ZBLAT3.SUMM
test/ZBLAT3_3M.SUMM
test/SHBLAT3.SUMM
test/SBBLAT3.SUMM
test/cblat1
test/cblat2
test/cblat3
test/cblat3_3m
test/dblat1
test/dblat2
test/dblat3
test/dblat3_3m
test/sblat1
test/sblat2
test/sblat3
test/sblat3_3m
test/test_shgemm
test/test_sbgemm
test/zblat1
test/zblat2
test/zblat3
test/zblat3_3m
build
build.*
*.swp
benchmark/*.goto
benchmark/smallscaling
.vscode
CMakeCache.txt
CMakeFiles/*
.vscode
**/__pycache__

View File

@ -1,175 +1,113 @@
# XXX: Precise is already deprecated, new default is Trusty.
# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming
dist: focal
dist: precise
sudo: true
language: c
matrix:
include:
- &test-ubuntu
# os: linux
os: linux
compiler: gcc
addons:
apt:
packages:
- gfortran
# before_script: &common-before
# - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
# script:
# - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# - make -C test $COMMON_FLAGS $BTYPE
# - make -C ctest $COMMON_FLAGS $BTYPE
# - make -C utest $COMMON_FLAGS $BTYPE
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64"
#
# - <<: *test-ubuntu
os: linux
arch: ppc64le
before_script: &common-before
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
script:
- travis_wait 50 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
- set -e
- make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64"
- <<: *test-ubuntu
os: linux-ppc64le
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX
- BTYPE="BINARY=64 USE_OPENMP=1"
- <<: *test-ubuntu
os: linux
arch: s390x
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
- sudo apt-get install --only-upgrade binutils
env:
# for matrix annotation only
- TARGET_BOX=IBMZ_LINUX
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 USE_OPENMP=1"
- <<: *test-ubuntu
os: linux
dist: focal
arch: s390x
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 INTERFACE64=1"
- <<: *test-ubuntu
compiler: clang
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
- sudo apt-get install --only-upgrade binutils
env:
# for matrix annotation only
- TARGET_BOX=IBMZ_LINUX
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang"
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 CC=clang"
# - <<: *test-ubuntu
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64 USE_OPENMP=1"
#
# - <<: *test-ubuntu
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64 INTERFACE64=1"
#
# - <<: *test-ubuntu
# compiler: clang
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64 CC=clang"
#
# - <<: *test-ubuntu
# compiler: clang
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64 INTERFACE64=1 CC=clang"
#
# - <<: *test-ubuntu
# addons:
# apt:
# packages:
# - gcc-multilib
# - gfortran-multilib
# env:
# - TARGET_BOX=LINUX32
# - BTYPE="BINARY=32"
#
- os: linux
arch: ppc64le
dist: bionic
compiler: gcc
before_script:
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
- sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y
script:
- travis_wait 50 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
- <<: *test-ubuntu
compiler: clang
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX_P9
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 INTERFACE64=1 CC=clang"
- <<: *test-ubuntu
addons:
apt:
packages:
- gcc-multilib
- gfortran-multilib
env:
- TARGET_BOX=LINUX32
- BTYPE="BINARY=32"
- os: linux
arch: ppc64le
dist: bionic
compiler: gcc
before_script:
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
- sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y
addons:
apt:
packages:
- binutils-mingw-w64-x86-64
- gcc-mingw-w64-x86-64
- gfortran-mingw-w64-x86-64
before_script: *common-before
script:
- travis_wait 50 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX_P9
- TARGET_BOX=WIN64
- BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
# - os: linux
# compiler: gcc
# addons:
# apt:
# packages:
# - binutils-mingw-w64-x86-64
# - gcc-mingw-w64-x86-64
# - gfortran-mingw-w64-x86-64
# before_script: *common-before
# script:
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# env:
# - TARGET_BOX=WIN64
# - BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
#
# Build & test on Alpine Linux inside chroot, i.e. on system with musl libc.
# These jobs needs sudo, so Travis runs them on VM-based infrastructure
# which is slower than container-based infrastructure used for jobs
# that don't require sudo.
# - &test-alpine
# os: linux
# dist: trusty
# sudo: true
# language: minimal
# before_install:
# - "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
# && echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
# - alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
# install:
# - sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
# before_script: *common-before
# script:
# # XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
# - alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
# - alpine make -C test $COMMON_FLAGS $BTYPE
# - alpine make -C ctest $COMMON_FLAGS $BTYPE
# - alpine make -C utest $COMMON_FLAGS $BTYPE
# env:
# - TARGET_BOX=LINUX64_MUSL
# - BTYPE="BINARY=64"
- &test-alpine
os: linux
dist: trusty
sudo: true
language: minimal
before_install:
- "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
&& echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
- alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
install:
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
before_script: *common-before
script:
- set -e
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
- alpine make -C test $COMMON_FLAGS $BTYPE
- alpine make -C ctest $COMMON_FLAGS $BTYPE
- alpine make -C utest $COMMON_FLAGS $BTYPE
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64"
# XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS,
# but only on Travis CI, cannot reproduce it elsewhere.
@ -179,132 +117,69 @@ matrix:
# - TARGET_BOX=LINUX64_MUSL
# - BTYPE="BINARY=64 USE_OPENMP=1"
# - <<: *test-alpine
# env:
# - TARGET_BOX=LINUX64_MUSL
# - BTYPE="BINARY=64 INTERFACE64=1"
#
# # Build with the same flags as Alpine do in OpenBLAS package.
# - <<: *test-alpine
# env:
# - TARGET_BOX=LINUX64_MUSL
# - BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"
- <<: *test-alpine
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64 INTERFACE64=1"
# - &test-cmake
# os: linux
# compiler: clang
# addons:
# apt:
# packages:
# - gfortran
# - cmake
# dist: trusty
# sudo: true
# before_script:
# - COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
# script:
# - mkdir build
# - CONFIG=Release
# - cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
# - cmake --build build --config $CONFIG -- -j2
# env:
# - CMAKE=1
# - <<: *test-cmake
# env:
# - CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1"
# - <<: *test-cmake
# compiler: gcc
# env:
# - CMAKE=1
# Build with the same flags as Alpine do in OpenBLAS package.
- <<: *test-alpine
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"
# - &test-macos
# os: osx
# osx_image: xcode11.5
# before_script:
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
# script:
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# env:
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-9"
#
# - <<: *test-macos
# osx_image: xcode12
# before_script:
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
# - brew update
# script:
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# env:
# - BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10"
#
# - <<: *test-macos
# osx_image: xcode12
# before_script:
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
# - brew update
# script:
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# env:
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
# - <<: *test-macos
# osx_image: xcode10
# env:
# - BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"
# - <<: *test-macos
# osx_image: xcode11.5
# before_script:
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
# - brew update
# env:
# - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch arm64 -miphoneos-version-min=10.0"
# - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
# - <<: *test-macos
# osx_image: xcode11.5
# env:
## - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
## - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
# - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch armv7 -miphoneos-version-min=5.1"
# - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
- &test-neoversen1
- &test-cmake
os: linux
arch: arm64
dist: focal
group: edge
virt: lxd
compiler: gcc
compiler: clang
addons:
apt:
packages:
- gfortran
- cmake
dist: trusty
sudo: true
before_script:
- COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
script:
- travis_wait 45 make && make lapack-test
- set -e
- mkdir build
- CONFIG=Release
- cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
- cmake --build build --config $CONFIG -- -j2
env:
- TARGET_BOX=NEOVERSE_N1
- &test-neon1-gcc8
os: linux
arch: arm64
dist: focal
group: edge
virt: lxd
- CMAKE=1
- <<: *test-cmake
env:
- CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1"
- <<: *test-cmake
compiler: gcc
addons:
apt:
packages:
- gcc-8
- gfortran-8
script:
- travis_wait 45 make QUIET_MAKE=1 CC=gcc-8 FC=gfortran-8 DYNAMIC_ARCH=1
env:
- TARGET_BOX=NEOVERSE_N1-GCC8
- CMAKE=1
- &test-macos
os: osx
osx_image: xcode10.1
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
- brew update
- brew install gcc@8 # for gfortran
script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"
- <<: *test-macos
osx_image: xcode10.0
env:
- BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"
- <<: *test-macos
osx_image: xcode10.1
env:
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk"
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
# whitelist
branches:
only:

View File

@ -1,9 +1,5 @@
Thank you for the support.
### [2019.12/2021.9] [Chan-Zuckerberg Foundation EOSS Initiative](https://chanzuckerberg.com/eoss/)
Between December 2019 and September 2021, development and maintaining of OpenBLAS was funded in part by the Chan-Zuckerberg Foundation in the context of two grants awarded to the NumPy Foundation and managed by NumFocus (Cycles 1 and 3 of the Essential Open Source Software for Science (EOSS) Initiative of the Chan-Zuckerberg Foundation)
### [2013.8] [Testbed for OpenBLAS project](https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project)
https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project/pledges

View File

@ -2,14 +2,11 @@
## Author: Hank Anderson <hank@statease.com>
##
cmake_minimum_required(VERSION 3.16.0)
cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 28.dev)
set(OpenBLAS_PATCH_VERSION 9.dev)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
# Adhere to GNU filesystem layout conventions
@ -17,99 +14,59 @@ include(GNUInstallDirs)
include(CMakePackageConfigHelpers)
#######
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF)
option(BUILD_LAPACK_DEPRECATED "When building LAPACK, include also some older, deprecated routines" ON)
set(LAPACK_STRLEN "" CACHE STRING "When building LAPACK, use this type (e.g. \"int\") for character lengths (defaults to size_t)")
option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON)
option(BUILD_BENCHMARKS "Build the collection of BLAS/LAPACK benchmarks" OFF)
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)
if(MSVC)
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
endif()
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF)
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64, ppc or RISCV64-RVV1.0 only)" OFF)
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64 or ppc only)" OFF)
option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF)
option(USE_PERL "Use the older PERL scripts for build preparation instead of universal shell scripts" OFF)
option(NO_WARMUP "Do not run a benchmark on each startup just to find the best location for the memory buffer" ON)
option(FIXED_LIBNAME "Use a non-versioned name for the library and no symbolic linking to variant names" OFF)
set(LIBNAMEPREFIX "" CACHE STRING "Add a prefix to the openblas part of the library name" )
set(LIBNAMESUFFIX "" CACHE STRING "Add a suffix after the openblas part of the library name" )
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
else()
set(NO_AFFINITY 1)
endif()
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
option(BUILD_STATIC_LIBS "Build static library" OFF)
if(NOT BUILD_STATIC_LIBS AND NOT BUILD_SHARED_LIBS)
set(BUILD_STATIC_LIBS ON CACHE BOOL "Build static library" FORCE)
endif()
if((BUILD_STATIC_LIBS AND BUILD_SHARED_LIBS) AND MSVC)
message(WARNING "Could not enable both BUILD_STATIC_LIBS and BUILD_SHARED_LIBS with MSVC, Disable BUILD_SHARED_LIBS")
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static library" FORCE)
set(NO_AFFINITY 1)
endif()
# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoids conflicts with other BLAS libraries, especially when using
# 64 bit integer interfaces in OpenBLAS.
set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in the shared library to avoid conflicts with other BLAS libraries" )
set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" )
#######
if(BUILD_WITHOUT_LAPACK)
set(NO_LAPACK 1)
set(NO_LAPACKE 1)
set(NO_LAPACK 1)
set(NO_LAPACKE 1)
endif()
if(BUILD_WITHOUT_CBLAS)
set(NO_CBLAS 1)
set(NO_CBLAS 1)
endif()
#######
if(MSVC AND MSVC_STATIC_CRT)
set(CompilerFlags
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
)
foreach(CompilerFlag ${CompilerFlags})
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
endforeach()
set(CompilerFlags
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
)
foreach(CompilerFlag ${CompilerFlags})
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
endforeach()
endif()
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")
if (USE_OPENMP)
find_package(OpenMP REQUIRED)
endif ()
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX}${SUFFIX64_UNDERSCORE})
set(OpenBLAS_LIBNAME openblas${SUFFIX64_UNDERSCORE})
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
@ -129,13 +86,9 @@ if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
endif ()
if (NOT DEFINED BUILD_BFLOAT16)
set (BUILD_BFLOAT16 false)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
# set(BUILD_BFLOAT16 true)
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
@ -167,11 +120,6 @@ if (BUILD_COMPLEX16)
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()
if (BUILD_BFLOAT16)
message(STATUS "Building Half Precision")
# list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
endif ()
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
endif ()
@ -180,10 +128,9 @@ endif ()
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
if(MSVC)
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib/Debug)
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib/Release)
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib/Debug)
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib/Release)
endif ()
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
set(TARGET_OBJS "")
foreach (SUBDIR ${SUBDIRS})
@ -196,7 +143,7 @@ endforeach ()
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NO_LAPACK)
if (NOT NOFORTRAN AND NOT NO_LAPACK)
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE)
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
@ -221,73 +168,12 @@ if (${DYNAMIC_ARCH})
endif ()
# add objects to the openblas lib
if(NOT NO_LAPACK)
add_library(LAPACK_OVERRIDES OBJECT ${LA_SOURCES})
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACK_OVERRIDES>")
endif()
if(NOT NO_LAPACKE)
add_library(LAPACKE OBJECT ${LAPACKE_SOURCES})
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACKE>")
endif()
#if(BUILD_RELAPACK)
# add_library(RELAPACK OBJECT ${RELA_SOURCES})
# list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:RELAPACK>")
#endif()
set(OpenBLAS_LIBS "")
if(BUILD_STATIC_LIBS)
add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
target_include_directories(${OpenBLAS_LIBNAME}_static INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_static)
endif()
if(BUILD_SHARED_LIBS)
add_library(${OpenBLAS_LIBNAME}_shared SHARED ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
target_include_directories(${OpenBLAS_LIBNAME}_shared INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_shared)
endif()
if(BUILD_STATIC_LIBS)
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_static)
else()
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_shared)
endif()
set_target_properties(${OpenBLAS_LIBS} PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
# Android needs to explicitly link against libm
if (${CMAKE_SYSTEM_NAME} MATCHES "AIX|Android|Linux|FreeBSD|OpenBSD|NetBSD|DragonFly|Darwin")
if(BUILD_STATIC_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_static m)
endif()
if(BUILD_SHARED_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_shared m)
endif()
endif()
if (USE_OPENMP)
if(BUILD_STATIC_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_static OpenMP::OpenMP_C)
endif()
if(BUILD_SHARED_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_shared OpenMP::OpenMP_C)
endif()
endif()
# Seems that this hack doesn't required since macOS 11 Big Sur
if (APPLE AND BUILD_SHARED_LIBS AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
set (CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
if (NOT NOFORTRAN)
set (CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1)
set (CMAKE_Fortran_CREATE_SHARED_LIBRARY
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
"sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '"
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load dummy.o -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'"
"sh -c 'ls -l ${CMAKE_BINARY_DIR}/lib'")
else ()
set (CMAKE_C_CREATE_SHARED_LIBRARY
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'")
endif ()
if(ANDROID)
target_link_libraries(${OpenBLAS_LIBNAME} m)
endif()
# Handle MSVC exports
@ -296,21 +182,21 @@ if(MSVC AND BUILD_SHARED_LIBS)
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
else()
# Creates verbose .def file (51KB vs 18KB)
set_target_properties(${OpenBLAS_LIBNAME}_shared PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true)
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true)
endif()
endif()
# Set output for libopenblas
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES EXPORT_NAME "OpenBLAS")
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES EXPORT_NAME "OpenBLAS")
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
endforeach()
enable_testing()
@ -319,238 +205,112 @@ if (USE_THREAD)
# Add threading library to linker
find_package(Threads)
if (THREADS_HAVE_PTHREAD_ARG)
set_target_properties(${OpenBLAS_LIBS} PROPERTIES
COMPILE_OPTIONS "-pthread"
INTERFACE_COMPILE_OPTIONS "-pthread"
)
endif()
if(BUILD_STATIC_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_static ${CMAKE_THREAD_LIBS_INIT})
endif()
if(BUILD_SHARED_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_shared ${CMAKE_THREAD_LIBS_INIT})
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY COMPILE_OPTIONS "-pthread")
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread")
endif()
target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
endif()
#if (MSVC OR NOT NOFORTRAN)
if (NOT NO_CBLAS)
if (NOT ONLY_CBLAS)
# Broken without fortran on unix
add_subdirectory(utest)
endif()
add_subdirectory(utest)
endif()
if (NOT NOFORTRAN)
if (NOT ONLY_CBLAS)
if (NOT MSVC AND NOT NOFORTRAN)
# Build test and ctest
add_subdirectory(test)
endif()
if (BUILD_TESTING AND NOT BUILD_WITHOUT_LAPACK)
add_subdirectory(lapack-netlib/TESTING)
if(NOT NO_CBLAS)
add_subdirectory(ctest)
endif()
endif()
if(NOT NO_CBLAS)
if (NOT ONLY_CBLAS)
add_subdirectory(ctest)
endif()
endif()
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
add_subdirectory(cpp_thread_test)
endif()
if (NOT FIXED_LIBNAME)
set_target_properties(${OpenBLAS_LIBS} PROPERTIES
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
SOVERSION ${OpenBLAS_MAJOR_VERSION}
)
endif()
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
if (NOT MSVC)
target_link_libraries(${OpenBLAS_LIBNAME}_shared "-Wl,-allow-multiple-definition")
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
else()
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE")
target_link_libraries(${OpenBLAS_LIBNAME} "/FORCE:MULTIPLE")
endif()
endif()
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
set(ARCH_IN ${ARCH})
endif()
if (${CORE} STREQUAL "generic")
set(ARCH_IN "GENERIC")
endif ()
if (NOT DEFINED EXPRECISION)
set(EXPRECISION_IN 0)
else()
set(EXPRECISION_IN ${EXPRECISION})
endif()
if (NOT DEFINED NO_CBLAS)
set(NO_CBLAS_IN 0)
else()
set(NO_CBLAS_IN ${NO_CBLAS})
endif()
if (NOT DEFINED NO_LAPACK)
set(NO_LAPACK_IN 0)
else()
set(NO_LAPACK_IN ${NO_LAPACK})
endif()
if (NOT DEFINED NO_LAPACKE)
set(NO_LAPACKE_IN 0)
else()
set(NO_LAPACKE_IN ${NO_LAPACKE})
endif()
if (NOT DEFINED NEED2UNDERSCORES)
set(NEED2UNDERSCORES_IN 0)
else()
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
endif()
if (NOT DEFINED ONLY_CBLAS)
set(ONLY_CBLAS_IN 0)
else()
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
endif()
if (NOT DEFINED BU)
set(BU _)
endif()
if (NOT ${SYMBOLPREFIX} STREQUAL "")
message(STATUS "adding prefix ${SYMBOLPREFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()
if (${BUILD_LAPACK_DEPRECATED})
set (BLD 1)
else ()
set (BLD 0)
endif()
if (${BUILD_BFLOAT16})
set (BBF16 1)
else ()
set (BBF16 0)
endif()
if (${BUILD_SINGLE})
set (BS 1)
else ()
set (BS 0)
endif()
if (${BUILD_DOUBLE})
set (BD 1)
else ()
set (BD 0)
endif()
if (${BUILD_COMPLEX})
set (BC 1)
else ()
set (BC 0)
endif()
if (${BUILD_COMPLEX16})
set (BZ 1)
else ()
set (BZ 0)
endif()
if (NOT USE_PERL)
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)
else()
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)
endif()
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
set(ARCH_IN ${ARCH})
endif()
if (BUILD_BENCHMARKS)
#find_package(OpenMP REQUIRED)
file(GLOB SOURCES "benchmark/*.c")
if (NOT USE_OPENMP)
file(GLOB REMFILE "benchmark/smallscaling.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
endif()
if (BUILD_WITHOUT_LAPACK)
file(GLOB REMFILE "benchmark/cholesky.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/geev.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/gesv.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/getri.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/potrf.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/spmv.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/symv.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/linpack.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
endif()
if (NOT USE_GEMM3M)
file(GLOB REMFILE "benchmark/gemm3m.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
endif()
foreach(source ${SOURCES})
get_filename_component(name ${source} NAME_WE)
if ((NOT ${name} STREQUAL "zdot-intel") AND (NOT ${name} STREQUAL "cula_wrapper"))
set(defines DEFAULT COMPLEX DOUBLE "COMPLEX\;DOUBLE")
foreach(define ${defines})
set(target_name "benchmark_${name}")
if (NOT "${define}" STREQUAL "DEFAULT")
string(JOIN "_" define_str ${define})
set(target_name "${target_name}_${define_str}")
endif()
if ((NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX_DOUBLE") AND
(NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX_DOUBLE") AND
(NOT ${target_name} STREQUAL "benchmark_max_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_max_COMPLEX_DOUBLE") AND
(NOT ${target_name} STREQUAL "benchmark_min_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_min_COMPLEX_DOUBLE"))
add_executable(${target_name} ${source})
target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} )
# target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} OpenMP::OpenMP_C)
if (NOT "${define}" STREQUAL "DEFAULT")
target_compile_definitions(${target_name} PRIVATE ${define})
endif()
endif()
endforeach()
endif()
endforeach()
if (${CORE} STREQUAL "generic")
set(ARCH_IN "GENERIC")
endif ()
if (NOT DEFINED EXPRECISION)
set(EXPRECISION_IN 0)
else()
set(EXPRECISION_IN ${EXPRECISION})
endif()
if (NOT DEFINED NO_CBLAS)
set(NO_CBLAS_IN 0)
else()
set(NO_CBLAS_IN ${NO_CBLAS})
endif()
if (NOT DEFINED NO_LAPACK)
set(NO_LAPACK_IN 0)
else()
set(NO_LAPACK_IN ${NO_LAPACK})
endif()
if (NOT DEFINED NO_LAPACKE)
set(NO_LAPACKE_IN 0)
else()
set(NO_LAPACKE_IN ${NO_LAPACKE})
endif()
if (NOT DEFINED NEED2UNDERSCORES)
set(NEED2UNDERSCORES_IN 0)
else()
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
endif()
if (NOT DEFINED ONLY_CBLAS)
set(ONLY_CBLAS_IN 0)
else()
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
endif()
if (NOT DEFINED BU)
set(BU _)
endif()
if (NOT ${SYMBOLPREFIX} STREQUAL "")
message(STATUS "adding prefix ${SYMBOLPREFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()
add_custom_command(TARGET ${OpenBLAS_LIBNAME} POST_BUILD
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)
endif()
# Install project
# Install libraries
if(BUILD_SHARED_LIBS AND BUILD_STATIC_LIBS)
install(TARGETS ${OpenBLAS_LIBNAME}_shared
EXPORT "OpenBLAS${SUFFIX64}Targets"
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
install(TARGETS ${OpenBLAS_LIBNAME}_static
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
else()
install(TARGETS ${OpenBLAS_LIBS}
EXPORT "OpenBLAS${SUFFIX64}Targets"
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
endif()
install(TARGETS ${OpenBLAS_LIBNAME}
EXPORT "OpenBLAS${SUFFIX64}Targets"
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
# Install headers
set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
@ -586,50 +346,37 @@ if(NOT NOFORTRAN)
endif()
if(NOT NO_CBLAS)
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
if (NOT ${SYMBOLPREFIX} STREQUAL "")
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
endif()
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
endif()
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
if(NOT NO_LAPACKE)
message (STATUS "Copying LAPACKE header files to ${CMAKE_INSTALL_INCLUDEDIR}")
if(BUILD_STATIC_LIBS)
add_dependencies( ${OpenBLAS_LIBNAME}_static genlapacke)
endif()
if(BUILD_SHARED_LIBS)
add_dependencies( ${OpenBLAS_LIBNAME}_shared genlapacke)
endif()
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
message (STATUS "Copying LAPACKE header files to ${CMAKE_INSTALL_INCLUDEDIR}")
add_dependencies( ${OpenBLAS_LIBNAME} genlapacke)
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
ADD_CUSTOM_TARGET(genlapacke
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
)
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
ADD_CUSTOM_TARGET(genlapacke
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
)
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
endif()
# Install pkg-config files
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
include(FindPkgConfig QUIET)
if(PKG_CONFIG_FOUND)
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
endif()
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
set(PN OpenBLAS)
set(CMAKECONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PN}${SUFFIX64}")
set(CMAKECONFIG_INSTALL_DIR "share/cmake/${PN}${SUFFIX64}")
configure_package_config_file(cmake/${PN}Config.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake"
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR})
@ -644,3 +391,4 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
install(EXPORT "${PN}${SUFFIX64}Targets"
NAMESPACE "${PN}${SUFFIX64}::"
DESTINATION ${CMAKECONFIG_INSTALL_DIR})

View File

@ -23,9 +23,6 @@
* Optimization on AMD Piledriver
* Optimization on Intel Haswell
* Chris Sidebottom <chris.sidebottom@arm.com>
* Optimizations and other improvements targeting AArch64
## Previous Developers
* Zaheer Chothia <zaheer.chothia@gmail.com>
@ -183,49 +180,3 @@ In chronological order:
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
* [2020-01-07] optimize AVX2 SGEMM and STRMM
* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR>
* [2020-04-15] Half-precision GEMM for bfloat16
* Marius Hillenbrand <https://github.com/mhillenibm>
* [2020-05-12] Revise dynamic architecture detection for IBM z
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
* Danfeng Zhang <https://github.com/craft-zhang>
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
* PingTouGe Semiconductor Co., Ltd.
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910
* Jake Arkinstall <https://github.com/jake-arkinstall>
* [2021-02-10] Remove in-source configure_file to enable builds in read-only contexts (issue #3100, PR #3101)
* River Dillon <oss@outerpassage.net>
* [2021-07-10] fix compilation with musl libc
* Bine Brank <https://github.com/binebrank>
* [2021-10-27] Add vector-length-agnostic DGEMM kernels for Arm SVE
* [2021-11-20] Vector-length-agnostic Arm SVE copy routines for DGEMM, DTRMM, DSYMM
* [2021-11-12] SVE kernels for SGEMM, STRMM and corresponding SVE copy functions
* [2022-01-06] SVE kernels for CGEMM, ZGEMM, CTRMM, ZTRMM and corresponding SVE copy functions
* [2022-01-18] SVE kernels and copy functions for TRSM
* Ilya Kurdyukov <https://github.com/ilyakurdyukov>
* [2021-02-21] Add basic support for the Elbrus E2000 architecture
* PLCT Lab, Institute of Software Chinese Academy of Sciences
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
* Pablo Romero <https://github.com/pablorcum>
* [2022-08] Fix building from sources for QNX
* Mark Seminatore <https://github.com/mseminatore>
* [2023-11-09] Improve Windows threading performance scaling
* [2024-02-09] Introduce MT_TRACE facility and improve code consistency
* Dirreke <https://github.com/mseminatore>
* [2024-01-16] Add basic support for the CSKY architecture
* Christopher Daley <https://github.com/cdaley>
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems

File diff suppressed because it is too large Load Diff

View File

@ -80,7 +80,7 @@
SUN
Fujitsu
4. Supported precision
4. Suported precision
Now x86/x86_64 version support 80bit FP precision in addition to
normal double presicion and single precision. Currently only

View File

@ -11,7 +11,7 @@
operation is finished.
2. Similar problem may happen under virtual machine. If supervisor
2. Simlar problem may happen under virtual machine. If supervisor
allocates different cores for each scheduling, BLAS performnace
will be bad. This is because BLAS also utilizes all cache,
unexpected re-schedule for different core may result of heavy

14
Jenkinsfile vendored
View File

@ -1,14 +0,0 @@
pipeline {
agent {
docker {
image 'osuosl/ubuntu-s390x'
}
}
stages {
stage('Build') {
steps {
sh 'make clean && make'
}
}
}
}

View File

@ -1,16 +0,0 @@
pipeline {
agent {
docker {
image 'osuosl/ubuntu-ppc64le:18.04'
}
}
stages {
stage('Build') {
steps {
sh 'sudo apt update'
sh 'sudo apt install gfortran -y'
sh 'make clean && make'
}
}
}
}

166
Makefile
View File

@ -1,9 +1,5 @@
TOPDIR = .
include ./Makefile.system
LNCMD = ln -fs
ifeq ($(FIXED_LIBNAME), 1)
LNCMD = true
endif
BLASDIRS = interface driver/level2 driver/level3 driver/others
@ -29,32 +25,21 @@ ifeq ($(NO_FORTRAN), 1)
define NOFORTRAN
1
endef
ifneq ($(NO_LAPACK), 1)
define C_LAPACK
define NO_LAPACK
1
endef
endif
export NOFORTRAN
export NO_LAPACK
export C_LAPACK
endif
ifeq ($(F_COMPILER),CRAY)
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS))
else
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS))
endif
ifdef LAPACK_STRLEN
LAPACK_FFLAGS += -DLAPACK_STRLEN=$(LAPACK_STRLEN)
endif
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test
.PHONY : all libs netlib $(RELA) test ctest shared install
.NOTPARALLEL : shared
.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test
all :: tests
all :: libs netlib $(RELA) tests shared
@echo
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
@echo
@ -71,27 +56,10 @@ ifneq ($(INTERFACE64), 0)
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
endif
endif
@$(CC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
cverinfo=`$(CC) --version | sed -n '1p'`; \
if [ -z "$${cverinfo}" ]; then \
cverinfo=`$(CC) --version | sed -n '2p'`; \
fi; \
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
else \
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
fi
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
@$(FC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
fverinfo=`$(FC) --version | sed -n '1p'`; \
if [ -z "$${fverinfo}" ]; then \
fverinfo=`$(FC) --version | sed -n '2p'`; \
fi; \
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
else \
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
fi
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
endif
ifneq ($(OSNAME), AIX)
@echo -n " Library Name ... $(LIBNAME)"
@ -100,13 +68,9 @@ else
endif
ifndef SMP
@echo " (Single-threading) "
@echo " (Single threaded) "
else
@echo " (Multi-threading; Max num-threads is $(NUM_THREADS))"
endif
ifeq ($(DYNAMIC_ARCH), 1)
@echo " Supporting multiple $(ARCH) cpu models with minimum requirement for the common code being $(CORE)"
@echo " (Multi threaded; Max num-threads is $(NUM_THREADS))"
endif
ifeq ($(USE_OPENMP), 1)
@ -122,10 +86,6 @@ ifeq ($(OSNAME), Darwin)
@echo "\"make PREFIX=/your_installation_path/ install\"."
@echo
@echo "(or set PREFIX in Makefile.rule and run make install."
@echo
@echo "Note that any flags passed to make during build should also be passed to make install"
@echo "to circumvent any install errors."
@echo
@echo "If you want to move the .dylib to a new location later, make sure you change"
@echo "the internal name of the dylib with:"
@echo
@ -134,25 +94,22 @@ endif
@echo
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
@echo
@echo "Note that any flags passed to make during build should also be passed to make install"
@echo "to circumvent any install errors."
@echo
shared : libs netlib $(RELA)
shared :
ifneq ($(NO_SHARED), 1)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
@$(MAKE) -C exports so
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
@$(MAKE) -C exports so
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), Darwin)
@$(MAKE) -C exports dyn
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
endif
ifeq ($(OSNAME), WINNT)
@$(MAKE) -C exports dll
@ -160,42 +117,36 @@ endif
ifeq ($(OSNAME), CYGWIN_NT)
@$(MAKE) -C exports dll
endif
ifeq ($(OSNAME), AIX)
@$(MAKE) -C exports so
endif
endif
tests : shared
tests :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
touch $(LIBNAME)
ifndef NO_FBLAS
$(MAKE) -C test all
endif
endif
ifneq ($(ONLY_CBLAS), 1)
$(MAKE) -C utest all
endif
ifneq ($(NO_CBLAS), 1)
ifneq ($(ONLY_CBLAS), 1)
ifndef NO_CBLAS
$(MAKE) -C ctest all
endif
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
$(MAKE) -C cpp_thread_test all
endif
endif
endif
libs :
ifeq ($(CORE), UNKNOWN)
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
endif
ifeq ($(NOFORTRAN), 1)
$(info OpenBLAS: Detecting fortran compiler failed. Can only compile BLAS and f2c-converted LAPACK.)
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
endif
ifeq ($(NO_STATIC), 1)
ifeq ($(NO_SHARED), 1)
$(error OpenBLAS: neither static nor shared are enabled.)
endif
endif
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
@for d in $(SUBDIRS) ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
@ -221,32 +172,15 @@ ifeq ($(DYNAMIC_OLDER), 1)
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
endif
endif
@echo TARGET=$(CORE) >> Makefile.conf_last
ifdef USE_THREAD
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
endif
ifdef SMP
ifdef NUM_THREADS
@echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last
else
@echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last
endif
endif
ifeq ($(USE_OPENMP),1)
@echo USE_OPENMP=1 >> Makefile.conf_last
endif
ifeq ($(INTERFACE64),1)
@echo INTERFACE64=1 >> Makefile.conf_last
endif
@echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last
@echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last
@-$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
@touch lib.grd
prof : prof_blas prof_lapack
prof_blas :
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
for d in $(SUBDIRS) ; \
do if test -d $$d; then \
$(MAKE) -C $$d prof || exit 1 ; \
@ -257,7 +191,7 @@ ifeq ($(DYNAMIC_ARCH), 1)
endif
blas :
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
for d in $(BLASDIRS) ; \
do if test -d $$d; then \
$(MAKE) -C $$d libs || exit 1 ; \
@ -265,7 +199,7 @@ blas :
done
hpl :
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
for d in $(BLASDIRS) ../laswp exports ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
@ -279,21 +213,26 @@ ifeq ($(DYNAMIC_ARCH), 1)
endif
hpl_p :
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
for d in $(SUBDIRS) ../laswp exports ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
fi; \
done
ifeq ($(NO_LAPACK), 1)
netlib :
else
netlib : lapack_prebuild
ifneq ($(NO_LAPACK), 1)
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
endif
ifneq ($(NO_LAPACKE), 1)
ifndef NO_LAPACKE
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
endif
endif
ifeq ($(NO_LAPACK), 1)
re_lapack :
@ -307,26 +246,13 @@ prof_lapack : lapack_prebuild
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
lapack_prebuild :
ifeq ($(NO_LAPACK), $(filter 0,$(NO_LAPACK)))
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(F_COMPILER), GFORTRAN)
-@echo "override FFLAGS = $(LAPACK_FFLAGS) -fno-tree-vectorize" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "override FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1)
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
else
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGIBM1)
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
endif
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
@ -359,18 +285,6 @@ else
endif
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_SINGLE), 1)
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_DOUBLE), 1)
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX), 1)
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX16), 1)
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
@ -403,20 +317,19 @@ lapack-test :
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; $(MAKE) all; ./testlsame; ./testslamch; ./testdlamch; \
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
endif
lapack-runtest: lapack-test
lapack-runtest:
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
blas-test:
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out)
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out)
@ -436,12 +349,11 @@ clean ::
@$(MAKE) -C kernel clean
#endif
@$(MAKE) -C reference clean
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
ifeq ($(OSNAME), Darwin)
@rm -rf getarch.dSYM getarch_2nd.dSYM
endif
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
@rm -f cblas.tmp cblas.tmp2
@touch $(NETLIB_LAPACK_DIR)/make.inc
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h

View File

@ -1,24 +1,42 @@
CPP = $(CC) -E
RANLIB = ranlib
ifeq ($(LIBSUBARCH), EV4)
LIBNAME = $(LIBPREFIX)_ev4.a
LIBNAME_P = $(LIBPREFIX)_ev4_p.a
endif
ifeq ($(LIBSUBARCH), EV5)
LIBNAME = $(LIBPREFIX)_ev5.a
LIBNAME_P = $(LIBPREFIX)_ev5_p.a
endif
ifeq ($(LIBSUBARCH), EV6)
LIBNAME = $(LIBPREFIX)_ev6.a
LIBNAME_P = $(LIBPREFIX)_ev6_p.a
endif
ifneq ($(COMPILER), NATIVE)
# GCC User
ifeq ($(CORE), EV4)
CCOMMON_OPT += -mcpu=ev4
ifeq ($(LIBSUBARCH), EV4)
OPTION += -DEV4 -mcpu=ev4
endif
ifeq ($(CORE), EV5)
CCOMMON_OPT += -mcpu=ev5
ifeq ($(LIBSUBARCH), EV5)
OPTION += -DEV5 -mcpu=ev5
endif
ifeq ($(CORE), EV6)
CCOMMON_OPT += -mcpu=ev6
ifeq ($(LIBSUBARCH), EV6)
OPTION += -DEV6 -mcpu=ev6
endif
else
# Compaq Compiler User
ifeq ($(CORE), EV4)
CCOMMON_OPT += -tune ev4 -arch ev4
ifeq ($(LIBSUBARCH), EV4)
OPTION += -DEV4 -tune ev4 -arch ev4
endif
ifeq ($(CORE), EV5)
CCOMMON_OPT += -tune ev5 -arch ev5
ifeq ($(LIBSUBARCH), EV5)
OPTION += -DEV5 -tune ev5 -arch ev5
endif
ifeq ($(CORE), EV6)
CCOMMON_OPT += -tune ev6 -arch ev6
ifeq ($(LIBSUBARCH), EV6)
OPTION += -DEV6 -tune ev6 -arch ev6
endif
endif

View File

@ -12,8 +12,3 @@ ifeq ($(CORE), ARMV6)
CCOMMON_OPT += -mfpu=vfp
FCOMMON_OPT += -mfpu=vfp
endif
ifdef HAVE_NEON
CCOMMON_OPT += -mfpu=neon
FCOMMON_OPT += -mfpu=neon
endif

View File

@ -1,354 +1,48 @@
ifneq ($(C_COMPILER), PGI)
ifeq ($(C_COMPILER), CLANG)
ISCLANG=1
endif
ifeq ($(C_COMPILER), FUJITSU)
ISCLANG=1
endif
ifneq (1, $(filter 1,$(GCCVERSIONGT4) $(ISCLANG)))
CCOMMON_OPT += -march=armv8-a
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a
endif
else
ifeq ($(CORE), ARMV8)
CCOMMON_OPT += -march=armv8-a
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a
endif
endif
ifeq ($(CORE), ARMV8SVE)
CCOMMON_OPT += -march=armv8-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a+sve
endif
endif
ifeq ($(CORE), CORTEXA53)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
endif
endif
ifeq ($(CORE), CORTEXA57)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
endif
endif
ifeq ($(CORE), CORTEXA72)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
ifeq ($(CORE), CORTEXA73)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
endif
endif
ifeq ($(CORE), CORTEXA76)
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
endif
endif
ifeq ($(CORE), FT2000)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
# Use a72 tunings because Neoverse-N1 is only available
# in GCC>=9
ifeq ($(CORE), NEOVERSEN1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
endif
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
endif
# Use a72 tunings because Neoverse-V1 is only available
# in GCC>=10.4
ifeq ($(CORE), NEOVERSEV1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.4-a+sve
ifeq (1, $(ISCLANG))
CCOMMON_OPT += -mtune=cortex-x1
else
CCOMMON_OPT += -mtune=neoverse-v1
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
endif
else
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(CROSS), 1)
CCOMMON_OPT += -mtune=native
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a
ifneq ($(CROSS), 1)
FCOMMON_OPT += -mtune=native
endif
endif
endif
else
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
else
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
endif
# Use a72 tunings because Neoverse-N2 is only available
# in GCC>=10.4
ifeq ($(CORE), NEOVERSEN2)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
ifneq ($(OSNAME), Darwin)
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
else
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
endif
else
CCOMMON_OPT += -march=armv8.5-a+sve+bf16
ifneq ($(CROSS), 1)
CCOMMON_OPT += -mtune=native
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.5-a
ifneq ($(CROSS), 1)
FCOMMON_OPT += -mtune=native
endif
endif
endif
else
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
else
CCOMMON_OPT += -march=armv8-a+sve+bf16 -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
endif
# Detect ARM Neoverse V2.
ifeq ($(CORE), NEOVERSEV2)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -march=armv9-a -mtune=neoverse-v2
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv9-a -mtune=neoverse-v2
endif
endif
endif
# Use a53 tunings because a55 is only available in GCC>=8.1
ifeq ($(CORE), CORTEXA55)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ8) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
endif
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
endif
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
endif
endif
endif
ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
endif
endif
ifeq ($(CORE), FALKOR)
CCOMMON_OPT += -march=armv8-a -mtune=falkor
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=falkor
endif
endif
ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
endif
ifeq ($(CORE), THUNDERX3T110)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.3-a
ifeq (0, $(ISCLANG))
CCOMMON_OPT += -mtune=thunderx3t110
else
CCOMMON_OPT += -mtune=thunderx2t99
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
endif
else
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
endif
endif
ifeq ($(CORE), VORTEX)
CCOMMON_OPT += -march=armv8.3-a
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.3-a
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq ($(CORE), TSV110)
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
endif
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
ifeq ($(CORE), EMAG8180)
CCOMMON_OPT += -march=armv8-a
ifeq ($(ISCLANG), 0)
CCOMMON_OPT += -mtune=emag
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=emag
endif
endif
endif
ifeq ($(CORE), A64FX)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ3) $(GCCVERSIONGTEQ11) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
endif
else
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-n1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-n1
endif
endif
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXX1)
CCOMMON_OPT += -march=armv8.2-a
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -mtune=cortex-x1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-x1
endif
else
CCOMMON_OPT += -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXX2)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -mtune=cortex-x2
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mtune=cortex-x2
endif
endif
endif
endif
#ifeq (1, $(filter 1,$(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXA510)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXA710)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -mtune=cortex-a710
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mtune=cortex-a710
endif
endif
endif
endif
endif
endif

View File

@ -1,4 +0,0 @@
ifeq ($(CORE), CK860FV)
CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static
endif

View File

@ -1 +0,0 @@
COPT = -Wall -O2 # -DGEMMTEST

View File

@ -2,21 +2,6 @@ TOPDIR = .
export GOTOBLAS_MAKEFILE = 1
-include $(TOPDIR)/Makefile.conf_last
include ./Makefile.system
LNCMD = ln -fs
ifdef THELIBNAME
LIBNAME=$(THELIBNAME)
LIBSONAME=$(THELIBSONAME)
endif
ifeq ($(FIXED_LIBNAME), 1)
LNCMD = true
endif
ifeq ($(INTERFACE64),1)
USE_64BITINT=1
endif
ifeq ($(USE_OPENMP),1)
FOMP_OPT:= -fopenmp
endif
PREFIX ?= /opt/OpenBLAS
@ -24,23 +9,10 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
OPENBLAS_BUILD_DIR := $(CURDIR)
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/$(LIBSONAMEBASE)
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
PKG_EXTRALIB := $(EXTRALIB)
ifeq ($(INTERFACE64),1)
SUFFIX64=64
endif
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
ifeq ($(USE_OPENMP), 1)
ifeq ($(C_COMPILER), PGI)
PKG_EXTRALIB += -lomp
else
PKG_EXTRALIB += -lgomp
endif
endif
.PHONY : install
.NOTPARALLEL : install
@ -73,62 +45,47 @@ install : lib.grd
ifndef NO_CBLAS
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@cp cblas.h cblas.tmp
ifdef SYMBOLPREFIX
@sed 's/cblas[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp > cblas.tmp2
@sed 's/openblas[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
#change back any openblas_complex_float and double that got hit
@sed 's/$(SYMBOLPREFIX)openblas_complex_/openblas_complex_/g' cblas.tmp > cblas.tmp2
@sed 's/goto[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
endif
ifdef SYMBOLSUFFIX
@sed 's/cblas[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp > cblas.tmp2
@sed 's/openblas[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
#change back any openblas_complex_float and double that got hit
@sed 's/\(openblas_complex_\)\([^ ]*\)$(SYMBOLSUFFIX)/\1\2 /g' cblas.tmp > cblas.tmp2
@sed 's/goto[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
endif
@sed 's/common/openblas_config/g' cblas.tmp > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
endif
ifneq ($(OSNAME), AIX)
ifneq ($(NO_LAPACKE), 1)
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
endif
#for install static library
ifneq ($(NO_STATIC),1)
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@install -m644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifneq ($(NO_SHARED),1)
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
@install -m755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), Darwin)
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-install_name_tool -id "$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).$(MAJOR_VERSION).dylib" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
@-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
endif
ifeq ($(OSNAME), WINNT)
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@ -142,7 +99,7 @@ endif
else
#install on AIX has different options syntax
ifneq ($(NO_LAPACKE), 1)
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@ -156,36 +113,27 @@ ifneq ($(NO_STATIC),1)
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifneq ($(NO_SHARED),1)
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
endif
#Generating openblas.pc
ifeq ($(INTERFACE64),1)
SUFFIX64=64
endif
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
@echo Generating $(LIBSONAMEBASE)$(SUFFIX64).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(PKGFILE)"
@echo 'libprefix='$(LIBNAMEPREFIX) >> "$(PKGFILE)"
@echo 'libnamesuffix='$(LIBNAMESUFFIX) >> "$(PKGFILE)"
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
@echo 'omp_opt='$(FOMP_OPT) >> "$(PKGFILE)"
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(TARGET) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
@echo 'version='$(VERSION) >> "$(PKGFILE)"
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)"
@cat openblas.pc.in >> "$(PKGFILE)"
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
#Generating OpenBLASConfig.cmake
@ -196,7 +144,7 @@ endif
ifneq ($(NO_SHARED),1)
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@ -220,3 +168,4 @@ endif
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo Install OK!

View File

@ -1,3 +0,0 @@
ifdef BINARY64
else
endif

View File

@ -1,4 +1,3 @@
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
ifdef BINARY64
else
endif

View File

@ -1,4 +1,3 @@
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
ifdef BINARY64
else
endif

View File

@ -9,110 +9,26 @@ else
USE_OPENMP = 1
endif
ifeq ($(CORE), POWER10)
ifneq ($(C_COMPILER), PGI)
ifeq ($(C_COMPILER), GCC)
ifeq ($(GCCVERSIONGTEQ10), 1)
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
else ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
else
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
endif
else
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
endif
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
endif
endif
endif
ifeq ($(CORE), POWER9)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
ifeq ($(C_COMPILER), GCC)
ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
CCOMMON_OPT += -mcpu=power8 -mtune=power8
ifeq ($(USE_OPENMP), 1)
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
else
CCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
CCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
endif
ifneq ($(F_COMPILER), PGI)
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9
endif
ifeq ($(F_COMPILER), GFORTRAN)
ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
FCOMMON_OPT += -mcpu=power8 -mtune=power8
else
FCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
endif
else
FCOMMON_OPT += -O2 -Mrecursive
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
endif
endif
ifeq ($(CORE), POWER8)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
else
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
endif
ifneq ($(F_COMPILER), PGI)
ifeq ($(OSNAME), AIX)
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
endif
else
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
endif
endif
else
FCOMMON_OPT += -O2 -Mrecursive
endif
endif
ifeq ($(USE_OPENMP), 1)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
else
CCOMMON_OPT += -DUSE_OPENMP -mp
endif
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -DUSE_OPENMP
else
ifneq ($(F_COMPILER), PGI)
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
else
FCOMMON_OPT += -DUSE_OPENMP -mp
endif
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
endif
endif
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -fno-integrated-as
endif
# workaround for C->FORTRAN ABI violation in LAPACKE
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -fno-optimize-sibling-calls
@ -147,25 +63,11 @@ endif
ifdef BINARY64
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX)
$(error Using GCC and XLF on AIX is not a supported combination.)
endif
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX)
$(error Using Clang and gFortran on AIX is not a supported combination.)
endif
ifeq ($(OSNAME), AIX)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mpowerpc64 -maix64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mpowerpc64 -maix64
endif
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -mpowerpc64 -maix64
endif
ifeq ($(COMPILER_F77), xlf)
FCOMMON_OPT += -q64
endif

View File

@ -3,10 +3,6 @@
export BINARY
export USE_OPENMP
ifdef DYNAMIC_ARCH
override HOST_CFLAGS += -DDYNAMIC_ARCH
endif
ifdef TARGET_CORE
TARGET_MAKE = Makefile_kernel.conf
TARGET_CONF = config_kernel.h
@ -15,23 +11,13 @@ TARGET_MAKE = Makefile.conf
TARGET_CONF = config.h
endif
ifdef USE_PERL
SCRIPTSUFFIX = .pl
else
SCRIPTSUFFIX =
endif
# CPUIDEMU = ../../cpuid/table.o
ifdef CPUIDEMU
EXFLAGS = -DCPUIDEMU -DVENDOR=99
endif
ifeq ($(TARGET), MIPS24K)
TARGET_FLAGS = -mips32r2
endif
ifeq ($(TARGET), MIPS1004K)
ifeq ($(TARGET), 1004K)
TARGET_FLAGS = -mips32r2
endif
@ -51,38 +37,14 @@ ifeq ($(TARGET), I6500)
TARGET_FLAGS = -mips64r6
endif
ifeq ($(TARGET), C910V)
TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d
endif
ifeq ($(TARGET), CK860FV)
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
endif
ifeq ($(TARGET), x280)
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
endif
ifeq ($(TARGET), RISCV64_ZVL256B)
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
endif
ifeq ($(TARGET), RISCV64_ZVL128B)
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
endif
ifeq ($(TARGET), RISCV64_GENERIC)
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
endif
all: getarch_2nd
./getarch_2nd 0 >> $(TARGET_MAKE)
./getarch_2nd 1 >> $(TARGET_CONF)
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" $(TARGET_FLAGS) $(CFLAGS)
config.h : c_check f_check getarch
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS)
ifneq ($(ONLY_CBLAS), 1)
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" $(TARGET_FLAGS)
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
else
#When we only build CBLAS, we set NOFORTRAN=2
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
@ -97,17 +59,13 @@ endif
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
$(HOSTCC) $(CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy
getarch_2nd : getarch_2nd.c config.h dummy
ifndef TARGET_CORE
$(HOSTCC) -I. $(HOST_CFLAGS) -o $(@F) getarch_2nd.c
$(HOSTCC) -I. $(CFLAGS) -o $(@F) getarch_2nd.c
else
$(HOSTCC) -I. $(HOST_CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c
$(HOSTCC) -I. $(CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c
endif
dummy:
.PHONY: dummy

View File

@ -1,20 +0,0 @@
ifeq ($(CORE), C910V)
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static
endif
ifeq ($(CORE), x280)
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
endif
ifeq ($(CORE), RISCV64_ZVL256B)
CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
endif
ifeq ($(CORE), RISCV64_ZVL128B)
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
endif
ifeq ($(CORE), RISCV64_GENERIC)
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
endif

View File

@ -3,12 +3,7 @@
#
# This library's version
VERSION = 0.3.28.dev
# If you set this prefix, the library name will be lib$(LIBNAMESUFFIX)openblas.a
# and lib$(LIBNAMESUFFIX)openblas.so, with a matching soname in the shared library
#
# LIBNAMEPREFIX = scipy
VERSION = 0.3.9.dev
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@ -134,17 +129,8 @@ VERSION = 0.3.28.dev
# Build LAPACK Deprecated functions since LAPACK 3.6.0
BUILD_LAPACK_DEPRECATED = 1
# The variable type assumed for the length of character arguments when passing
# data between Fortran LAPACK and C BLAS (defaults to "size_t", but older GCC
# versions used "int"). Mismatches will not cause runtime failures but may result
# in build warnings or errors when building with link-time optimization (LTO)
# LAPACK_STRLEN=int
# Build RecursiveLAPACK on top of LAPACK
# BUILD_RELAPACK = 1
# Have RecursiveLAPACK actually replace standard LAPACK routines instead of
# just adding its equivalents with a RELAPACK_ prefix
# RELAPACK_REPLACE = 1
# If you want to use the legacy threaded Level 3 implementation.
# USE_SIMPLE_THREADED_LEVEL3 = 1
@ -179,10 +165,6 @@ NO_AFFINITY = 1
# If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
# BIGNUMA = 1
# If you are compiling for an embedded system ("bare metal") like Cortex M series
# Note that you will have to provide implementations of malloc() and free() in this case
# EMBEDDED = 1
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
# and OS. However, the performance is low.
# NO_AVX = 1
@ -225,17 +207,7 @@ NO_AFFINITY = 1
# to the user space. If bigphysarea is enabled, it will use it.
# DEVICEDRIVER_ALLOCATION = 1
# Use large page allocation (called hugepage support in Linux context)
# for the thread buffers (with access by shared memory operations)
# HUGETLB_ALLOCATION = 1
# Use large page allocation called hugepages in Linux) based on mmap accessing
# a memory-backed pseudofile (requires hugetlbfs to be mounted in the system,
# the example below has it mounted on /hugepages. OpenBLAS will create the backing
# file as gotoblas.processid in that path)
# HUGETLBFILE_ALLOCATION = /hugepages
# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
# CONSISTENT_FPCSR = 1
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
@ -300,36 +272,7 @@ COMMON_PROF = -pg
# work at all.
#
# CPP_THREAD_SAFETY_TEST = 1
#
# use this to run only the less memory-hungry GEMV test
# CPP_THREAD_SAFETY_GEMV = 1
# If you want to enable the experimental BFLOAT16 support
# BUILD_BFLOAT16 = 1
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
# will be allocated on the heap rather than the stack. (This array alone requires
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
# counts, but obviously it is not the only item that ends up on the stack.
# The default value of 32 ensures that the overall requirement is compatible
# with the default 1MB stacksize imposed by having the Java VM loaded without use
# of its -Xss parameter.
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
# BLAS3_MEM_ALLOC_THRESHOLD = 160
# By default the library contains BLAS functions (and LAPACK if selected) for all input types.
# To build a smaller library supporting e.g. only single precision real (SGEMM etc.) or only
# the functions for complex numbers, uncomment the desired type(s) below
# BUILD_SINGLE = 1
# BUILD_DOUBLE = 1
# BUILD_COMPLEX = 1
# BUILD_COMPLEX16 = 1
#
# End of user configuration
#

View File

@ -3,29 +3,21 @@ RANLIB = ranlib
ifdef BINARY64
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9 -m64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9 -m64
endif
ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -m64
ifeq ($(COMPILER_F77), f90)
FCOMMON_OPT += -xarch=v9
endif
else
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9
else
CCOMMON_OPT += -xarch=v9
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9
endif
ifeq ($(COMPILER_F77), f95)
ifeq ($(COMPILER_F77), f90)
FCOMMON_OPT += -xarch=v8plusb
endif
@ -45,4 +37,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \
else
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
endif
endif

File diff suppressed because it is too large Load Diff

View File

@ -1,18 +1,16 @@
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
SBEXTOBJS_P = $(SBEXTOBJS:.$(SUFFIX)=.$(PSUFFIX))
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
BLASOBJS = $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
BLASOBJS_P = $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
BLASOBJS = $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
BLASOBJS_P = $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
ifdef EXPRECISION
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
@ -24,23 +22,19 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
libs :: $(BLASOBJS) $(COMMONOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

View File

@ -1,21 +1,5 @@
# COMPILER_PREFIX = mingw32-
ifneq ($(DYNAMIC_ARCH),1)
ADD_CPUFLAGS = 1
else
ifdef TARGET_CORE
ADD_CPUFLAGS = 1
endif
endif
ifdef ADD_CPUFLAGS
ifdef HAVE_SSE
CCOMMON_OPT += -msse
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -msse
endif
endif
endif
ifeq ($(OSNAME), Interix)
ARFLAGS = -m x86
@ -70,20 +54,3 @@ LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
else
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
endif
ifdef HAVE_SSE2
CCOMMON_OPT += -msse2
FCOMMON_OPT += -msse2
endif
ifdef HAVE_SSE3
CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3
ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3
FCOMMON_OPT += -mssse3
endif
ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1
FCOMMON_OPT += -msse4.1
endif
endif

View File

@ -8,214 +8,36 @@ endif
endif
endif
ifeq ($(C_COMPILER), CLANG)
ifeq ($(findstring icx,$(CC)),icx)
CCOMMON_OPT += -fp-model=consistent
endif
endif
ifneq ($(DYNAMIC_ARCH),1)
ADD_CPUFLAGS = 1
else
ifdef TARGET_CORE
ADD_CPUFLAGS = 1
endif
endif
ifdef ADD_CPUFLAGS
ifdef HAVE_SSE3
CCOMMON_OPT += -msse3
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -msse3
endif
endif
ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mssse3
endif
endif
ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -msse4.1
endif
endif
ifndef OLDGCC
ifdef HAVE_AVX
CCOMMON_OPT += -mavx
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mavx
endif
endif
endif
ifndef NO_AVX2
ifdef HAVE_AVX2
CCOMMON_OPT += -mavx2
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mavx2
endif
endif
endif
ifeq ($(CORE), SKYLAKEX)
ifndef DYNAMIC_ARCH
ifndef NO_AVX512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
ifeq ($(CORE), COOPERLAKE)
ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC)
# cooperlake support was added in 10.1
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
CCOMMON_OPT += -march=cooperlake
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=cooperlake
endif
else # gcc not support, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
else ifeq ($(C_COMPILER), CLANG)
# cooperlake support was added in clang 9
ifeq ($(CLANGVERSIONGTEQ9), 1)
CCOMMON_OPT += -march=cooperlake
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=cooperlake
endif
else # not supported in clang, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
ifeq ($(CORE), SAPPHIRERAPIDS)
ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC)
# sapphire rapids support was added in 11
ifeq ($(GCCVERSIONGTEQ11), 1)
CCOMMON_OPT += -march=sapphirerapids
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=sapphirerapids
endif
else # gcc not support, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
else ifeq ($(C_COMPILER), CLANG)
# sapphire rapids support was added in clang 12
ifeq ($(CLANGVERSIONGTEQ12), 1)
CCOMMON_OPT += -march=sapphirerapids
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=sapphirerapids
endif
else # not supported in clang, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
ifeq ($(CORE), ZEN)
ifdef HAVE_AVX512VL
ifndef NO_AVX512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
endif
ifdef HAVE_AVX2
ifeq ($(CORE), HASWELL)
ifndef DYNAMIC_ARCH
ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
CCOMMON_OPT += -mavx2
endif
else
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -mavx2
endif
endif
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifeq ($(F_COMPILER), GFORTRAN)
# AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
FCOMMON_OPT += -mavx2
endif
else
ifeq ($(F_COMPILER), FLANG)
FCOMMON_OPT += -mavx2
endif
endif
endif
endif
endif
endif
ifeq ($(OSNAME), Interix)

View File

@ -5,12 +5,6 @@ FCOMMON_OPT += -march=z13 -mzvector
endif
ifeq ($(CORE), Z14)
CCOMMON_OPT += -march=z14 -mzvector -O3
CCOMMON_OPT += -march=z14 -mzvector
FCOMMON_OPT += -march=z14 -mzvector
endif
# Enable floating-point expression contraction for clang, since it is the
# default for gcc
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -ffp-contract=on
endif

168
README.md
View File

@ -2,24 +2,17 @@
[![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
Cirrus CI: [![Build Status](https://api.cirrus-ci.com/github/xianyi/OpenBLAS.svg?branch=develop)](https://cirrus-ci.com/github/xianyi/OpenBLAS)
Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.svg?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS)
AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n8nkkx64/branch/develop?svg=true)](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
[![Build Status](https://dev.azure.com/xianyi/OpenBLAS/_apis/build/status/xianyi.OpenBLAS?branchName=develop)](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop)
OSUOSL POWERCI [![Build Status](https://powerci.osuosl.org/buildStatus/icon?job=OpenBLAS_gh%2Fdevelop)](http://powerci.osuosl.org/job/OpenBLAS_gh/job/develop/)
OSUOSL IBMZ-CI [![Build Status](http://ibmz-ci.osuosl.org/buildStatus/icon?job=OpenBLAS-Z%2Fdevelop)](http://ibmz-ci.osuosl.org/job/OpenBLAS-Z/job/develop/)
## Introduction
OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version.
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
Please read the documentation in the OpenBLAS folder: <https://github.com/OpenMathLib/OpenBLAS/docs>.
For a general introduction to the BLAS routines, please refer to the extensive documentation of their reference implementation hosted at netlib:
<https://www.netlib.org/blas>. On that site you will likewise find documentation for the reference implementation of the higher-level library LAPACK - the **L**inear **A**lgebra **Pack**age that comes included with OpenBLAS. If you are looking for a general primer or refresher on Linear Algebra, the set of six
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare <https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/> or Youtube <https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek> may be helpful.
Please read the documentation on the OpenBLAS wiki pages: <https://github.com/xianyi/OpenBLAS/wiki>.
## Binary Packages
@ -27,13 +20,12 @@ We provide official binary packages for the following platform:
* Windows x86/x86_64
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the Releases section of the github project page, [https://github.com/OpenMathLib/OpenBLAS/releases](https://github.com/OpenMathLib/OpenBLAS/releases).
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
## Installation from Source
Download from project homepage, https://github.com/OpenMathLib/OpenBLAS/, or check out the code
using Git from https://github.com/OpenMathLib/OpenBLAS.git. (If you want the most up to date version, be
sure to use the develop branch - master is several years out of date due to a change of maintainership.)
Download from project homepage, https://xianyi.github.com/OpenBLAS/, or check out the code
using Git from https://github.com/xianyi/OpenBLAS.git.
Buildtime parameters can be chosen in Makefile.rule, see there for a short description of each option.
Most can also be given directly on the make or cmake command line.
@ -41,45 +33,33 @@ Most can also be given directly on the make or cmake command line.
Building OpenBLAS requires the following to be installed:
* GNU Make or CMake
* GNU Make
* A C compiler, e.g. GCC or Clang
* A Fortran compiler (optional, for LAPACK)
* IBM MASS (optional, see below)
### Normal compile
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and
can either be set there (typically by removing the comment character from the respective line), or used on the
`make` command line.
Note that when you run `make install` after building, you need to repeat all command line options you provided to `make`
in the build step, as some settings like the supported maximum number of threads are automatically derived from the
build host by default, which might not be what you want.
For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any
build options you plan to set.
The full target list is in the file `TargetList.txt`.
### Cross compile
Set `CC` and `FC` to point to the cross toolchains, and if you use `make`, also set `HOSTCC` to your host C compiler.
Set `CC` and `FC` to point to the cross toolchains, and set `HOSTCC` to your host C compiler.
The target must be specified explicitly when cross compiling.
Examples:
* On a Linux system, cross-compiling to an older MIPS64 router board:
* On an x86 box, compile this library for a loongson3a CPU:
```sh
make BINARY=64 CC=mipsisa64r6el-linux-gnuabi64-gcc FC=mipsisa64r6el-linux-gnuabi64-gfortran HOSTCC=gcc TARGET=P6600
```
* or to a Windows x64 host:
```sh
make CC="i686-w64-mingw32-gcc -Bstatic" FC="i686-w64-mingw32-gfortran -static-libgfortran" TARGET=HASWELL BINARY=32 CROSS=1 NUM_THREADS=20 CONSISTENT_FPCSR=1 HOSTCC=gcc
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
```
You can find instructions for other cases both in the "Supported Systems" section below and in the docs folder. The .yml scripts included with the sources (which contain the
build scripts for the "continuous integration" (CI) build tests automatically run on every proposed change to the sources) may also provide additional hints.
When compiling for a more modern CPU TARGET of the same architecture, e.g. TARGET=SKYLAKEX on a HASWELL host, option "CROSS=1" can be used to suppress the automatic invocation of the tests at the end of the build.
* On an x86 box, compile this library for a loongson3a CPU with loongcc (based on Open64) compiler:
```sh
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
```
### Debug version
@ -118,7 +98,7 @@ Use `PREFIX=` when invoking `make`, for example
```sh
make install PREFIX=your_installation_directory
```
(along with all options you added on the `make` command line in the preceding build step)
The default installation directory is `/opt/OpenBLAS`.
## Supported CPUs and Operating Systems
@ -133,17 +113,11 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
- **Intel Skylake-X**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
- **Intel Cooper Lake**: as Skylake-X with improved BFLOAT16 support.
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4)
#### MIPS32
- **MIPS 1004K**: uses P5600 codes
- **MIPS 24K**: uses P5600 codes
- **AMD ZEN**: Uses Haswell codes with some optimizations.
#### MIPS64
@ -159,105 +133,34 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **ARMv8**: Basic ARMV8 with small caches, optimized Level-3 and Level-2 BLAS
- **Cortex-A53**: same as ARMV8 (different cpu specifications)
- **Cortex-A55**: same as ARMV8 (different cpu specifications)
- **Cortex A57**: Optimized Level-3 and Level-2 functions
- **Cortex A72**: same as A57 ( different cpu specifications)
- **Cortex A73**: same as A57 (different cpu specifications)
- **Cortex A76**: same as A57 (different cpu specifications)
- **Falkor**: same as A57 (different cpu specifications)
- **ThunderX**: Optimized some Level-1 functions
- **ThunderX2T99**: Optimized Level-3 BLAS and parts of Levels 1 and 2
- **ThunderX3T110**
- **TSV110**: Optimized some Level-3 helper functions
- **EMAG 8180**: preliminary support based on A57
- **Neoverse N1**: (AWS Graviton2) preliminary support
- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS
- **Apple Vortex**: preliminary support based on ThunderX2/3
- **A64FX**: preliminary support, optimized Level-3 BLAS
- **ARMV8SVE**: any ARMV8 cpu with SVE extensions
#### PPC/PPC64
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2.
- **AIX**: Dynamic architecture with OpenXL and OpenMP.
```sh
make CC=ibm-clang_r FC=xlf_r TARGET=POWER7 BINARY=64 USE_OPENMP=1 INTERFACE64=1 DYNAMIC_ARCH=1 USE_THREAD=1
```
#### IBM zEnterprise System
- **Z13**: Optimized Level-3 BLAS and Level-1,2
- **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2
#### RISC-V
- **C910V**: Optimized Level-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1.
```sh
make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran
```
(also known to work on C906 as long as you use only single-precision functions - its instruction set support appears to be incomplete in double precision)
- **x280**: Level-3 BLAS and Level-1,2 are optimized by RISC-V Vector extension 1.0.
```sh
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
```
- **ZVL???B**: Level-3 BLAS and Level-1,2 including vectorised kernels targeting generic RISCV cores with vector support with registers of at least the corresponding width; ZVL128B and ZVL256B are available.
e.g.:
```sh
make TARGET=RISCV64_ZVL256B CFLAGS="-DTARGET=RISCV64_ZVL256B" \
BINARY=64 ARCH=riscv64 CC='clang -target riscv64-unknown-linux-gnu' \
AR=riscv64-unknown-linux-gnu-ar AS=riscv64-unknown-linux-gnu-gcc \
LD=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran \
HOSTCC=gcc HOSTFC=gfortran -j
```
#### LOONGARCH64
- **LA64_GENERIC**: Optimized Level-3, Level-2 and Level-1 BLAS with scalar instruction
```sh
make HOSTCC=gcc TARGET=LA64_GENERIC CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1
```
The old-style TARGET=LOONGSONGENERIC is still supported
- **LA264**: Optimized Level-3, Level-2 and Level-1 BLAS with LSX instruction
```sh
make HOSTCC=gcc TARGET=LA264 CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1
```
The old-style TARGET=LOONGSON2K1000 is still supported
- **LA464**: Optimized Level-3, Level-2 and Level-1 BLAS with LASX instruction
```sh
make HOSTCC=gcc TARGET=LA464 CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1
```
The old-style TARGET=LOONGSON3R5 is still supported
### Support for multiple targets in a single library
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX, Cooper Lake, Sapphire Rapids. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.
`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying DYNAMIC_ARCH=1 in Makefile.rule, on the gmake command line or as -DDYNAMIC_ARCH=TRUE in cmake.
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify DYNAMIC_OLDER=1, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option DYNAMIC_LIST that allows to specify an individual list of targets to include instead of the default.
DYNAMIC_ARCH is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano.
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus. If compiler support for SVE is available at build time, support for NeoverseN2, NeoverseV1 as well as generic ArmV8SVE targets is also enabled.
For **POWER**, the list encompasses POWER6, POWER8 and POWER9. POWER10 is additionally available if a sufficiently recent compiler is used for the build.
on **ZARCH** it comprises Z13 and Z14 as well as generic zarch support.
On **riscv64**, DYNAMIC_ARCH enables support for riscv64_zvl128b and riscv64_zvl256b in addition to generic riscv64 support. A compiler that supports RVV 1.0 is required to build OpenBLAS for riscv64 when DYNAMIC_ARCH is enabled.
On **LoongArch64**, it comprises LA264 and LA464 as well as generic LoongArch64 support.
The `TARGET` option can - and usually **should** - be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the common code in the library, usually you will want to set this to the oldest model you expect to encounter.
Failure to specify this may lead to advanced instructions being used by the compiler, just because the build host happens to support them. This is most likely to happen when aggressive optimization options are in effect, and the resulting library may then crash with an
illegal instruction error on weaker hardware, before it even reaches the BLAS routines specifically included for that cpu.
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus.
For **POWER**, the list encompasses POWER6, POWER8 and POWER9, on **ZARCH** it comprises Z13 and Z14.
The TARGET option can be used in conjunction with DYNAMIC_ARCH=1 to specify which cpu model should be assumed for all the
common code in the library, usually you will want to set this to the oldest model you expect to encounter.
Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library.
### Supported OS
@ -270,10 +173,9 @@ Please note that it is not possible to combine support for different architectur
- **NetBSD**: Supported by the community. We don't actively test the library on this OS.
- **DragonFly BSD**: Supported by the community. We don't actively test the library on this OS.
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
- **AIX**: Supported on PPC up to POWER10
- **AIX**: Supported on PPC up to POWER8
- **Haiku**: Supported by the community. We don't actively test the library on this OS.
- **SunOS**: Supported by the community. We don't actively test the library on this OS.
- **Cortex-M**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-on-Cortex-M>.
- **SunOS**: Supported by the community. We don't actively test the library on this OS:
## Usage
@ -305,27 +207,25 @@ We provide the following functions to control the number of threads at runtime:
void goto_set_num_threads(int num_threads);
void openblas_set_num_threads(int num_threads);
```
Note that these are only used once at library initialization, and are not available for
fine-tuning thread numbers in individual BLAS calls.
If you compile this library with `USE_OPENMP=1`, you should use the above functions too.
## Reporting bugs
Please submit an issue in https://github.com/OpenMathLib/OpenBLAS/issues.
Please submit an issue in https://github.com/xianyi/OpenBLAS/issues.
## Contact
+ Use github discussions: https://github.com/OpenMathLib/OpenBLAS/discussions
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev
## Change log
Please see Changelog.txt.
Please see Changelog.txt to view the differences between OpenBLAS and GotoBLAS2 1.13 BSD version.
## Troubleshooting
* Please read the [FAQ](https://github.com/OpenMathLib/OpenBLAS/docs/faq,md) in the docs folder first.
* Please read the [FAQ](https://github.com/xianyi/OpenBLAS/wiki/Faq) first.
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
Clang 3.0 will generate the wrong AVX binary code.
@ -342,9 +242,9 @@ Please see Changelog.txt.
## Contributing
1. [Check for open issues](https://github.com/OpenMathLib/OpenBLAS/issues) or open a fresh issue
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue
to start a discussion around a feature idea or a bug.
2. Fork the [OpenBLAS](https://github.com/OpenMathLib/OpenBLAS) repository to start making your changes.
2. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
3. Write a test which shows that the bug was fixed or that the feature works as expected.
4. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.

View File

@ -1,20 +0,0 @@
# Security Policy
## Supported Versions
It is generally recommended to use the latest release as this project
does not maintain multiple stable branches and providing packages e.g.
for Linux distributions is outside our scope. In particular, versions
before 0.3.18 can be assumed to carry the out-of-bounds-read error in
the LAPACK ?LARRV family of functions that was the subject of
CVE-2021-4048
## Reporting a Vulnerability
If you suspect that you have found a vulnerability - a defect that could
be abused to compromise the security of a user's code or systems - please
do not use the normal github issue tracker (except perhaps to post a general
warning if you deem that necessary). Instead, please contact the project
maintainers through the email addresses given in their github user profiles.
Defects found in the "lapack-netlib" subtree should ideally be reported to
the maintainers of the reference implementation of LAPACK, lapack@icl.itk.edu

View File

@ -22,8 +22,6 @@ SANDYBRIDGE
HASWELL
SKYLAKEX
ATOM
COOPERLAKE
SAPPHIRERAPIDS
b)AMD CPU:
ATHLON
@ -51,7 +49,6 @@ POWER6
POWER7
POWER8
POWER9
POWER10
PPCG4
PPC970
PPC970MP
@ -61,11 +58,9 @@ CELL
3.MIPS CPU:
P5600
MIPS1004K
MIPS24K
1004K
4.MIPS64 CPU:
MIPS64_GENERIC
SICORTEX
LOONGSON3A
LOONGSON3B
@ -93,59 +88,12 @@ CORTEXA53
CORTEXA57
CORTEXA72
CORTEXA73
CORTEXA76
CORTEXA510
CORTEXA710
CORTEXX1
CORTEXX2
NEOVERSEN1
NEOVERSEV1
NEOVERSEN2
CORTEXA55
EMAG8180
FALKOR
THUNDERX
THUNDERX2T99
TSV110
THUNDERX3T110
VORTEX
A64FX
ARMV8SVE
FT2000
9.System Z:
ZARCH_GENERIC
Z13
Z14
10.RISC-V 64:
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
RISCV64_ZVL128B
C910V
x280
RISCV64_ZVL256B
11.LOONGARCH64:
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 are legacy names,
// and it is recommended to use the more standardized naming conventions
// LA64_GENERIC/LA264/LA464. You can still specify TARGET as
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 during compilation or runtime,
// and they will be internally relocated to LA64_GENERIC/LA264/LA464.
LOONGSONGENERIC
LOONGSON2K1000
LOONGSON3R5
LA64_GENERIC
LA264
LA464
12. Elbrus E2000:
E2K
13. Alpha
EV4
EV5
EV6
14.CSKY
CSKY
CK860FV

View File

@ -29,15 +29,15 @@ environment:
global:
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
matrix:
# - COMPILER: clang-cl
# WITH_FORTRAN: ON
# - COMPILER: clang-cl
# DYNAMIC_ARCH: ON
# WITH_FORTRAN: OFF
# - COMPILER: cl
# - COMPILER: MinGW64-gcc-7.2.0-mingw
# DYNAMIC_ARCH: OFF
# WITH_FORTRAN: ignore
- COMPILER: clang-cl
WITH_FORTRAN: yes
- COMPILER: clang-cl
DYNAMIC_ARCH: ON
WITH_FORTRAN: no
- COMPILER: cl
- COMPILER: MinGW64-gcc-7.2.0-mingw
DYNAMIC_ARCH: OFF
WITH_FORTRAN: ignore
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
COMPILER: MinGW-gcc-6.3.0-32
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
@ -46,10 +46,13 @@ environment:
install:
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
- if [%COMPILER%]==[clang-cl] conda update --yes -n base conda
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
- if [%COMPILER%]==[clang-cl] conda config --set auto_update_conda false
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake ninja flang=11.0.1
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake
- if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
@ -65,14 +68,15 @@ before_build:
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
- if [%WITH_FORTRAN%]==[OFF] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DMSVC_STATIC_CRT=ON ..
- if [%WITH_FORTRAN%]==[ON] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
- if [%USE_OPENMP%]==[ON] cmake -DUSE_OPENMP=ON ..
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
build_script:
- cmake --build .
test_script:
- ctest -j2
- echo Running Test
- cd utest
- openblas_utest

View File

@ -4,22 +4,14 @@ trigger:
branches:
include:
- develop
resources:
containers:
- container: oneapi-hpckit
image: intel/oneapi-hpckit:latest
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
- container: oneapi-basekit
image: intel/oneapi-basekit:latest
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
jobs:
# manylinux1 is useful to test because the
# standard Docker container uses an old version
# of gcc / glibc
- job: manylinux1_gcc
pool:
vmImage: 'ubuntu-latest'
vmImage: 'ubuntu-16.04'
steps:
- script: |
echo "FROM quay.io/pypa/manylinux1_x86_64
@ -35,7 +27,7 @@ jobs:
displayName: Run manylinux1 docker build
- job: Intel_SDE_skx
pool:
vmImage: 'ubuntu-latest'
vmImage: 'ubuntu-16.04'
steps:
- script: |
# at the time of writing the available Azure Ubuntu vm image
@ -57,248 +49,3 @@ jobs:
# we need a privileged docker run for sde process attachment
docker run --privileged intel_sde
displayName: 'Run AVX512 SkylakeX docker build / test'
- job: Windows_cl
pool:
vmImage: 'windows-latest'
steps:
- task: CMake@1
inputs:
workingDirectory: 'build' # Optional
cmakeArgs: '-G "Visual Studio 17 2022" ..'
- task: CMake@1
inputs:
cmakeArgs: '--build . --config Release'
workingDirectory: 'build'
- script: |
cd build
cd utest
dir
openblas_utest.exe
- job: Windows_mingw_gmake
pool:
vmImage: 'windows-latest'
steps:
- script: |
mingw32-make CC=gcc FC=gfortran DYNAMIC_ARCH=1 DYNAMIC_LIST="SANDYBRIDGE"
- job: Windows_clang_cmake
pool:
vmImage: 'windows-latest'
steps:
- script: |
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
set "LIB=C:\Miniconda\Library\lib;%LIB%"
set "CPATH=C:\Miniconda\Library\include;%CPATH%
conda config --add channels conda-forge --force
conda config --set auto_update_conda false
conda install --yes ninja
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
mkdir build
cd build
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DNOFORTRAN=1 -DMSVC_STATIC_CRT=ON ..
cmake --build . --config Release
ctest
- job: Windows_flang_clang
pool:
vmImage: 'windows-2022'
steps:
- script: |
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
set "LIB=C:\Miniconda\Library\lib;%LIB%"
set "CPATH=C:\Miniconda\Library\include;%CPATH%"
conda config --add channels conda-forge --force
conda config --set auto_update_conda false
conda install --yes --quiet ninja flang
mkdir build
cd build
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER="flang -I C:\Miniconda\Library\include\flang" -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
cmake --build . --config Release
ctest
- job: Windows_cl_flang
pool:
vmImage: 'windows-2022'
steps:
- script: |
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
set "LIB=C:\Miniconda\Library\lib;%LIB%"
set "CPATH=C:\Miniconda\Library\include;%CPATH%"
conda config --add channels conda-forge --force
conda config --set auto_update_conda false
conda install --yes --quiet ninja flang
mkdir build
cd build
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
cmake -G "Ninja" -DCMAKE_C_COMPILER=cl -DCMAKE_Fortran_COMPILER=flang-new -DC_LAPACK=1 -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
cmake --build . --config Release
ctest
ctest --rerun-failed --output-on-failure
- job: OSX_OpenMP
pool:
vmImage: 'macOS-12'
steps:
- script: |
brew update
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-13 FC=gfortran-13
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-13 FC=gfortran-13 PREFIX=../blasinst install
ls -lR ../blasinst
- job: OSX_GCC_Nothreads
pool:
vmImage: 'macOS-12'
steps:
- script: |
brew update
make USE_THREADS=0 CC=gcc-13 FC=gfortran-13
- job: OSX_GCC12
pool:
vmImage: 'macOS-latest'
steps:
- script: |
brew update
make CC=gcc-12 FC=gfortran-12
- job: OSX_OpenMP_Clang
pool:
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
steps:
- script: |
brew update
brew install llvm libomp
make TARGET=CORE2 USE_OPENMP=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang NOFORTRAN=1
- job: OSX_OpenMP_Clang_cmake
pool:
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
steps:
- script: |
brew update
brew install llvm libomp
mkdir build
cd build
cmake -DTARGET=CORE2 -DUSE_OPENMP=1 -DINTERFACE64=1 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DNOFORTRAN=1 -DNO_AVX512=1 ..
make
ctest
- job: OSX_dynarch_cmake
pool:
vmImage: 'macOS-12'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
steps:
- script: |
mkdir build
cd build
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DDYNAMIC_LIST='NEHALEM HASWELL SKYLAKEX' -DCMAKE_C_COMPILER=gcc-13 -DCMAKE_Fortran_COMPILER=gfortran-13 -DBUILD_SHARED_LIBS=ON ..
cmake --build .
ctest
- job: OSX_Ifort_Clang
pool:
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/edb4dc2f-266f-47f2-8d56-21bc7764e119/m_HPCKit_p_2023.2.0.49443.dmg
LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOS_FORTRAN_COMPONENTS: intel.oneapi.mac.ifort-compiler
steps:
- script: |
brew update
brew install llvm libomp
sudo mkdir -p /opt/intel
sudo chown $USER /opt/intel
displayName: prepare for cache restore
- task: Cache@2
inputs:
path: /opt/intel/oneapi
key: '"install" | "$(MACOS_HPCKIT_URL)" | "$(MACOS_FORTRAN_COMPONENTS)"'
cacheHitVar: CACHE_RESTORED
- script: |
curl --output webimage.dmg --url $(MACOS_HPCKIT_URL) --retry 5 --retry-delay 5
hdiutil attach webimage.dmg
sudo /Volumes/"$(basename "$(MACOS_HPCKIT_URL)" .dmg)"/bootstrapper.app/Contents/MacOS/bootstrapper -s --action install --components="$(MACOS_FORTRAN_COMPONENTS)" --eula=accept --continue-with-optional-error=yes --log-dir=.
installer_exit_code=$?
hdiutil detach /Volumes/"$(basename "$URL" .dmg)" -quiet
exit $installer_exit_code
displayName: install
condition: ne(variables.CACHE_RESTORED, 'true')
- script: |
source /opt/intel/oneapi/setvars.sh
make CC=/usr/local/opt/llvm/bin/clang FC=ifort
- job: OSX_NDK_ARMV7
pool:
vmImage: 'macOS-12'
steps:
- script: |
brew update
brew install --cask android-ndk
export ANDROID_NDK_HOME=/usr/local/share/android-ndk
make TARGET=ARMV7 ONLY_CBLAS=1 CC=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi21-clang AR=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/llvm-ar HOSTCC=gcc ARM_SOFTFP_ABI=1 -j4
- job: OSX_IOS_ARMV8
pool:
vmImage: 'macOS-12'
variables:
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.2.sdk -arch arm64 -miphoneos-version-min=10.0
steps:
- script: |
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
- job: OSX_IOS_ARMV7
pool:
vmImage: 'macOS-12'
variables:
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CFLAGS: -O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.2.sdk -arch armv7 -miphoneos-version-min=5.1
steps:
- script: |
make TARGET=ARMV7 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
- job: OSX_xbuild_DYNAMIC_ARM64
pool:
vmImage: 'macOS-12'
variables:
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX13.1.sdk -arch arm64
steps:
- script: |
ls /Applications/Xcode_14.2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
/Applications/Xcode_12.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -arch arm64 --print-supported-cpus
/Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang --version
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
- job: ALPINE_MUSL
pool:
vmImage: 'ubuntu-latest'
steps:
- script: |
wget https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.14.0/alpine-chroot-install \
&& echo 'ccbf65f85cdc351851f8ad025bb3e65bae4d5b06 alpine-chroot-install' | sha1sum -c \
|| exit 1
alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers sudo'
alpine make DYNAMIC_ARCH=1 BINARY=64
alpine make DYNAMIC_ARCH=1 BINARY=64 PREFIX=mytestdir install
alpine ls -l mytestdir/include
alpine echo "// tests that inclusion of openblas_config.h works with musl" >test_install.c
alpine echo "#include <openblas_config.h>" >>test_install.c
alpine echo "int main(){" >> test_install.c
alpine echo "cpu_set_t* cpu_set = NULL;}" >>test_install.c
alpine gcc -Imytestdir/include test_install.c -Lmytestdir/lib -lopenblas -lpthread -lgfortran -o test_install

File diff suppressed because it is too large Load Diff

View File

@ -1,133 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef AMAX
#ifdef COMPLEX
#ifdef DOUBLE
#define AMAX BLASFUNC(dzamax)
#else
#define AMAX BLASFUNC(scamax)
#endif
#else
#ifdef DOUBLE
#define AMAX BLASFUNC(damax)
#else
#define AMAX BLASFUNC(samax)
#endif
#endif
int main(int argc, char *argv[])
{
FLOAT *x;
blasint m, i;
blasint inc_x = 1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1, timeg;
argc--;
argv++;
if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}
if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for (m = from; m <= to; m += step)
{
timeg = 0;
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++)
{
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
begin();
AMAX(&m, x, &inc_x);
end();
timeg += getsec();
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,137 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef AMIN
#ifdef COMPLEX
#ifdef DOUBLE
#define AMIN BLASFUNC(dzamin)
#else
#define AMIN BLASFUNC(scamin)
#endif
#else
#ifdef DOUBLE
#define AMIN BLASFUNC(damin)
#else
#define AMIN BLASFUNC(samin)
#endif
#endif
int main(int argc, char *argv[])
{
FLOAT *x;
blasint m, i;
blasint inc_x = 1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1, timeg;
argc--;
argv++;
if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}
if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for (m = from; m <= to; m += step)
{
timeg = 0;
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++)
{
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
begin();
AMIN(&m, x, &inc_x);
end();
timeg += getsec();
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -25,108 +25,169 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef ASUM
#ifdef COMPLEX
#ifdef DOUBLE
#define ASUM BLASFUNC(dzasum)
#define ASUM BLASFUNC(dzasum)
#else
#define ASUM BLASFUNC(scasum)
#define ASUM BLASFUNC(scasum)
#endif
#else
#ifdef DOUBLE
#define ASUM BLASFUNC(dasum)
#define ASUM BLASFUNC(dasum)
#else
#define ASUM BLASFUNC(sasum)
#define ASUM BLASFUNC(sasum)
#endif
#endif
int main(int argc, char *argv[])
{
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
FLOAT result;
blasint m, i;
blasint inc_x = 1;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1, timeg;
int from = 1;
int to = 200;
int step = 1;
argc--;
argv++;
struct timeval start, stop;
double time1,timeg;
if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for (m = from; m <= to; m += step)
for(m = from; m <= to; m += step)
{
timeg = 0;
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++)
{
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
begin();
result = ASUM(&m, x, &inc_x);
end();
timeg += getsec();
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
result = ASUM (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
if (loops > 1)
timeg /= loops;
timeg /= loops;
#ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
#else
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
#endif
}
return 0;

View File

@ -1,124 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef AXPBY
#ifdef COMPLEX
#ifdef DOUBLE
#define AXPBY BLASFUNC(zaxpby)
#else
#define AXPBY BLASFUNC(caxpby)
#endif
#else
#ifdef DOUBLE
#define AXPBY BLASFUNC(daxpby)
#else
#define AXPBY BLASFUNC(saxpby)
#endif
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
FLOAT beta[2] = {2.0, 2.0};
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (l=0; l<loops; l++)
{
begin();
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
end();
timeg += getsec();
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
(COMPSIZE * COMPSIZE * 4. - COMPSIZE) * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef AXPY
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
@ -56,6 +127,8 @@ int main(int argc, char *argv[]){
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -78,7 +151,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -102,13 +175,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
gettimeofday( &start, (struct timezone *)0);
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
@ -117,7 +190,7 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.9f sec\n",
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
}

View File

@ -1,134 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#endif
#include "common.h"
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
/* Benchmarks should allocate with cacheline (often 64 bytes) alignment
to avoid unreliable results. This technique, storing the allocated
pointer value just before the aligned memory, doesn't require
C11's aligned_alloc for compatibility with older compilers. */
static void *aligned_alloc_cacheline(size_t n)
{
void *p = malloc((size_t)(void *) + n + L1_DATA_LINESIZE - 1);
if (p) {
void **newp = (void **)
(((uintptr_t)p + L1_DATA_LINESIZE) & (uintptr_t)-L1_DATA_LINESIZE);
newp[-1] = p;
p = newp;
}
return p;
}
#define malloc aligned_alloc_cacheline
#define free(p) free((p) ? ((void **)(p))[-1] : (p))
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop;
#elif defined(__APPLE__)
mach_timebase_info_data_t info;
uint64_t start = 0, stop = 0;
#else
struct timespec start = { 0, 0 }, stop = { 0, 0 };
#endif
double getsec()
{
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
#elif defined(__APPLE__)
mach_timebase_info(&info);
return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9;
#else
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
#endif
}
void begin() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &start, (struct timezone *)0);
#elif defined(__APPLE__)
start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
#else
clock_gettime(CLOCK_REALTIME, &start);
#endif
}
void end() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &stop, (struct timezone *)0);
#elif defined(__APPLE__)
stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
#else
clock_gettime(CLOCK_REALTIME, &stop);
#endif
}

View File

@ -36,7 +36,12 @@
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
double fabs(double);
@ -66,6 +71,41 @@ double fabs(double);
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
static __inline double getmflops(int ratio, int m, double secs){
double mm = (double)m;
@ -105,6 +145,7 @@ int main(int argc, char *argv[]){
FLOAT maxerr;
struct timeval start, stop;
double time1;
argc--;argv++;
@ -132,46 +173,46 @@ int main(int argc, char *argv[]){
#ifndef COMPLEX
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
for(i = 0; i < j; i++) a[i + j * m] = 0.;
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
}
}
#else
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
a[(i + j * m) * 2 + 0] = 0.;
a[(i + j * m) * 2 + 1] = 0.;
}
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[(j + j * m) * 2 + 1] = 0.;
for(i = j + 1; i < m; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[(j + j * m) * 2 + 1] = 0.;
for(i = j + 1; i < m; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
a[(i + j * m) * 2 + 0] = 0.;
a[(i + j * m) * 2 + 1] = 0.;
}
}
}
@ -179,31 +220,29 @@ int main(int argc, char *argv[]){
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
begin();
gettimeofday( &start, (struct timezone *)0);
POTRF(uplo[uplos], &m, b, &m, &info);
end();
gettimeofday( &stop, (struct timezone *)0);
if (info != 0) {
fprintf(stderr, "Info = %d\n", info);
exit(1);
}
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
maxerr = 0.;
if (!(uplos & 1)) {
for (j = 0; j < m; j++) {
for(i = 0; i <= j; i++) {
#ifndef COMPLEX
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
#else
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
#endif
}
}
@ -211,13 +250,10 @@ int main(int argc, char *argv[]){
for (j = 0; j < m; j++) {
for(i = j; i < m; i++) {
#ifndef COMPLEX
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
#else
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
#endif
}
}

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef COPY
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
@ -57,9 +128,8 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
double time1 = 0.0, timeg = 0.0;
long nanos = 0;
time_t seconds = 0;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -81,7 +151,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -93,27 +163,35 @@ int main(int argc, char *argv[]){
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (l=0; l<loops; l++)
{
begin();
COPY (&m, x, &inc_x, y, &inc_y );
end();
timeg += getsec();
}
timeg /= loops;
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
fprintf(stderr,
" %10.2f MBytes %12.9f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg / 1.e6, timeg);
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
COPY (&m, x, &inc_x, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MBytes %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}

View File

@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef DOT
#ifdef DOUBLE
#define DOT BLASFUNC(ddot)
#else
#define DOT BLASFUNC(sdot)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
@ -49,6 +122,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -71,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -95,12 +169,15 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
gettimeofday( &start, (struct timezone *)0);
result = DOT (&m, x, &inc_x, y, &inc_y );
end();
timeg += getsec();
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}

View File

@ -36,7 +36,13 @@
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GEEV
@ -68,6 +74,71 @@ extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
@ -83,6 +154,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
@ -123,7 +195,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < to; j++){
for(i = 0; i < to * COMPSIZE; i++){
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -142,7 +214,7 @@ int main(int argc, char *argv[]){
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -151,7 +223,7 @@ int main(int argc, char *argv[]){
for(m = from; m <= to; m += step){
fprintf(stderr, " %6d : ", (int)m);
begin();
gettimeofday( &start, (struct timezone *)0);
lwork = -1;
#ifndef COMPLEX
@ -167,14 +239,14 @@ int main(int argc, char *argv[]){
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
#endif
end();
gettimeofday( &stop, (struct timezone *)0);
if (info) {
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
exit(1);
}
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n",

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GEMM
@ -33,8 +39,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE
#define GEMM BLASFUNC(dgemm)
#elif defined(HALF)
#define GEMM BLASFUNC(sbgemm)
#else
#define GEMM BLASFUNC(sgemm)
#endif
@ -49,10 +53,74 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
IFLOAT *a, *b;
FLOAT *c;
FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 0.0};
FLOAT beta [] = {0.0, 0.0};
char transa = 'N';
@ -68,6 +136,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1, timeg;
argc--;argv++;
@ -115,25 +184,25 @@ int main(int argc, char *argv[]){
k = to;
}
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) {
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) {
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
for (i = 0; i < m * k * COMPSIZE; i++) {
a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < k * n * COMPSIZE; i++) {
b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < m * n * COMPSIZE; i++) {
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
@ -156,14 +225,14 @@ int main(int argc, char *argv[]){
ldc = m;
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
begin();
gettimeofday( &start, (struct timezone *)0);
for (j=0; j<loops; j++) {
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
}
end();
time1 = getsec();
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg = time1/loops;
fprintf(stderr,

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GEMM
@ -47,6 +53,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *b, *c;
@ -62,6 +133,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -91,7 +163,7 @@ int main(int argc, char *argv[]){
loops = atoi(p);
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -109,18 +181,22 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
begin();
gettimeofday( &start, (struct timezone *)0);
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
end();
timeg += getsec();
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;

View File

@ -25,7 +25,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GEMV
@ -47,6 +52,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
@ -66,6 +137,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -109,7 +181,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -125,7 +197,7 @@ int main(int argc, char *argv[]){
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
for(j = 0; j < m; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[j + i * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -139,10 +211,10 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
gettimeofday( &start, (struct timezone *)0);
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
end();
time1 = getsec();
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
@ -162,7 +234,7 @@ int main(int argc, char *argv[]){
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
for(j = 0; j < m; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[j + i * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -176,10 +248,10 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
gettimeofday( &start, (struct timezone *)0);
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
end();
time1 = getsec();
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GER
@ -43,6 +49,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
@ -59,6 +131,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -92,7 +165,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -109,7 +182,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -125,13 +198,16 @@ int main(int argc, char *argv[]){
for (l=0; l<loops; l++)
{
begin();
gettimeofday( &start, (struct timezone *)0);
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
end();
timeg += getsec();
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;

View File

@ -36,7 +36,12 @@
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
double fabs(double);
@ -61,6 +66,71 @@ double fabs(double);
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *b;
@ -72,6 +142,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
@ -94,7 +165,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -106,35 +177,39 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
b[(long)i + (long)j * (long)m * COMPSIZE] = 0.0;
b[i + j * m * COMPSIZE] = 0.0;
}
}
for (j = 0; j < m; ++j) {
for (i = 0; i < m * COMPSIZE; ++i) {
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
b[i] += a[i + j * m * COMPSIZE];
}
}
begin();
gettimeofday( &start, (struct timezone *)0);
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
fprintf(stderr,
"%10.2f MFlops %10.6f s\n",
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
}
return 0;

View File

@ -36,7 +36,12 @@
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GETRF
#undef GETRI
@ -67,22 +72,84 @@
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a,*work;
FLOAT wkopt[4];
blasint *ipiv;
blasint m, i, j, l, info,lwork;
blasint m, i, j, info,lwork;
int from = 1;
int to = 200;
int step = 1;
int loops = 1;
double time1,timeg;
char *p;
char btest = 'I';
struct timeval start, stop;
double time1;
argc--;argv++;
@ -90,9 +157,6 @@ int main(int argc, char *argv[]){
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
@ -108,7 +172,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < to; j++){
for(i = 0; i < to * COMPSIZE; i++){
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -124,48 +188,39 @@ int main(int argc, char *argv[]){
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE FLops Time Lwork\n");
for(m = from; m <= to; m += step){
timeg = 0.;
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++) {
if (btest == 'F') begin();
GETRF (&m, &m, a, &m, ipiv, &info);
if (btest == 'F') {
end();
timeg += getsec();
}
if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1);
}
if (btest == 'I') begin();
gettimeofday( &start, (struct timezone *)0);
lwork = -1;
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
lwork = (blasint)wkopt[0];
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
if (btest == 'I') end();
gettimeofday( &stop, (struct timezone *)0);
if (info) {
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
exit(1);
}
if (btest == 'I')
timeg += getsec();
} // loops
time1 = timeg/(double)loops;
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n",
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);

View File

@ -1,134 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef HBMV
#ifdef DOUBLE
#define HBMV BLASFUNC(zhbmv)
#else
#define HBMV BLASFUNC(chbmv)
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {0.0, 0.0};
blasint k = 1;
char uplo='L';
blasint m, i, j;
blasint inc_x=1, inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_K"))) k = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n",
from, to, step, uplo, k, inc_x, inc_y, loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) {
timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
for(j = 0; j < m; j++) {
for(i = 0; i < m * COMPSIZE; i++) {
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (l = 0; l < loops; l++) {
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
end();
timeg += getsec();
}
timeg /= loops;
fprintf(stderr, " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HEMM
@ -35,6 +41,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define HEMM BLASFUNC(chemm)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *b, *c;
@ -54,6 +126,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
@ -78,7 +151,7 @@ int main(int argc, char *argv[]){
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -91,19 +164,21 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
begin();
gettimeofday( &start, (struct timezone *)0);
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",

View File

@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HEMV
#ifdef DOUBLE
#define HEMV BLASFUNC(zhemv)
#else
#define HEMV BLASFUNC(chemv)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
@ -51,6 +124,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -78,7 +152,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -93,7 +167,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -108,13 +182,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
gettimeofday( &start, (struct timezone *)0);
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;

View File

@ -1,109 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef HER
#ifdef DOUBLE
#define HER BLASFUNC(zher)
#else
#define HER BLASFUNC(cher)
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x;
FLOAT alpha[] = {1.0, 1.0};
blasint incx = 1;
char *p;
char uplo='U';
char trans='N';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
blasint m, i, j;
int from = 1;
int to = 200;
int step = 1;
double time1;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
fprintf(stderr, " %6d : ", (int)m);
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
HER (&uplo, &m, alpha, x, &incx, a, &m );
end();
time1 = getsec();
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);
}
return 0;
}

View File

@ -1,113 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef HER2
#ifdef DOUBLE
#define HER2 BLASFUNC(zher2)
#else
#define HER2 BLASFUNC(cher2)
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
blasint inc = 1;
char *p;
char uplo='U';
char trans='N';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
blasint m, i, j;
int from = 1;
int to = 200;
int step = 1;
double time1;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
fprintf(stderr, " %6d : ", (int)m);
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
end();
time1 = getsec();
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);
}
return 0;
}

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HER2K
#ifdef DOUBLE
@ -34,6 +40,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define HER2K BLASFUNC(cher2k)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *b, *c;
@ -53,6 +125,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
@ -77,7 +150,7 @@ int main(int argc, char *argv[]){
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -90,19 +163,21 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
begin();
gettimeofday( &start, (struct timezone *)0);
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",

View File

@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HERK
#ifdef DOUBLE
#define HERK BLASFUNC(zherk)
#else
#define HERK BLASFUNC(cherk)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *c;
@ -54,6 +127,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
@ -75,7 +149,7 @@ int main(int argc, char *argv[]){
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -88,22 +162,25 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
begin();
gettimeofday( &start, (struct timezone *)0);
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
}
return 0;

View File

@ -1,133 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef HPMV
#ifdef DOUBLE
#define HPMV BLASFUNC(zhpmv)
#else
#define HPMV BLASFUNC(chpmv)
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char uplo='L';
blasint m, i, j;
blasint inc_x=1, inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) {
timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
for(j = 0; j < m; j++) {
for(i = 0; i < m * COMPSIZE; i++) {
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (l = 0; l < loops; l++) {
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr, " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IAMAX
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
@ -56,6 +127,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -73,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -94,13 +166,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
gettimeofday( &start, (struct timezone *)0);
IAMAX (&m, x, &inc_x);
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
@ -109,7 +181,7 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MBytes %10.6f sec\n",
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}

View File

@ -1,120 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef IAMIN
#ifdef COMPLEX
#ifdef DOUBLE
#define IAMIN BLASFUNC(izamin)
#else
#define IAMIN BLASFUNC(icamin)
#endif
#else
#ifdef DOUBLE
#define IAMIN BLASFUNC(idamin)
#else
#define IAMIN BLASFUNC(isamin)
#endif
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
IAMIN (&m, x, &inc_x);
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,114 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef IMAX
#ifndef COMPLEX
#ifdef DOUBLE
#define IMAX BLASFUNC(idmax)
#else
#define IMAX BLASFUNC(ismax)
#endif
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
IMAX (&m, x, &inc_x);
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,114 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef IMIN
#ifndef COMPLEX
#ifdef DOUBLE
#define IMIN BLASFUNC(idmin)
#else
#define IMIN BLASFUNC(ismin)
#endif
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
IMIN (&m, x, &inc_x);
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -36,7 +36,12 @@
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
double fabs(double);
@ -67,26 +72,88 @@ double fabs(double);
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *b;
blasint *ipiv;
blasint m, i, j, l, info;
blasint m, i, j, info;
blasint unit = 1;
int from = 1;
int to = 200;
int step = 1;
int loops = 1;
FLOAT maxerr;
double time1, time2, timeg1,timeg2;
struct timeval start, stop;
double time1, time2;
char *p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
@ -107,19 +174,19 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Residual Decompose Solve Total\n");
for(m = from; m <= to; m += step){
timeg1 = timeg2 = 0.;
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++) {
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -127,38 +194,36 @@ int main(int argc, char *argv[]){
for (j = 0; j < m; ++j) {
for (i = 0; i < m * COMPSIZE; ++i) {
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
b[i] += a[i + j * m * COMPSIZE];
}
}
begin();
gettimeofday( &start, (struct timezone *)0);
GETRF (&m, &m, a, &m, ipiv, &info);
end();
gettimeofday( &stop, (struct timezone *)0);
if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1);
}
timeg1 += getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
begin();
gettimeofday( &start, (struct timezone *)0);
GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);
end();
gettimeofday( &stop, (struct timezone *)0);
if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1);
}
timeg2 += getsec();
} //loops
time1=timeg1/(double)loops;
time2=timeg2/(double)loops;
time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
maxerr = 0.;
for(i = 0; i < m; i++){

View File

@ -1,113 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef NAMAX
#ifndef COMPLEX
#ifdef DOUBLE
#define NAMAX BLASFUNC(dmax)
#else
#define NAMAX BLASFUNC(smax)
#endif
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
NAMAX (&m, x, &inc_x);
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,113 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef NAMIN
#ifndef COMPLEX
#ifdef DOUBLE
#define NAMIN BLASFUNC(dmin)
#else
#define NAMIN BLASFUNC(smin)
#endif
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
NAMIN (&m, x, &inc_x);
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef NRM2
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
@ -56,6 +127,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -73,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -94,13 +166,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
gettimeofday( &start, (struct timezone *)0);
NRM2 (&m, x, &inc_x);
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;

View File

@ -1,122 +0,0 @@
/***************************************************************************
Copyright (c) 2024, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef OMATCOPY
#ifndef COMPLEX
#ifdef DOUBLE
#define OMATCOPY BLASFUNC(domatcopy)
#else
#define OMATCOPY BLASFUNC(somatcopy)
#endif
#else
#ifdef DOUBLE
#define OMATCOPY BLASFUNC(zomatcopy)
#else
#define OMATCOPY BLASFUNC(comatcopy)
#endif
#endif
int main(int argc, char *argv[]){
FLOAT *a, *b;
FLOAT alpha[] = {1.0, 0.0};
char trans = 'N';
char order = 'C';
blasint crows, ccols, clda, cldb;
int loops = 1;
char *p;
int from = 1;
int to = 200;
int step = 1;
int i, j;
double time1, timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++; }
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
if (argc > 0) { step = atol(*argv); argc--; argv++; }
if ((p = getenv("OPENBLAS_TRANS"))) {
trans=*p;
}
if ((p = getenv("OPENBLAS_ORDER"))) {
order=*p;
}
TOUPPER(trans);
TOUPPER(order);
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c : Order=%c\n", from, to, step, trans, order);
p = getenv("OPENBLAS_LOOPS");
if ( p != NULL ) {
loops = atoi(p);
}
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
for (i = 0; i < to * to * COMPSIZE; i++) {
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < to * to * COMPSIZE; i++) {
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
fprintf(stderr, " SIZE Flops Time\n");
for (i = from; i <= to; i += step) {
cldb = clda = crows = ccols = i;
fprintf(stderr, " ROWS=%4d, COLS=%4d : ", (int)crows, (int)ccols);
begin();
for (j=0; j<loops; j++) {
OMATCOPY (&order, &trans, &crows, &ccols, alpha, a, &clda, b, &cldb);
}
end();
time1 = getsec();
timeg = time1/loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * (double)ccols * (double)crows / timeg * 1.e-6, time1);
}
free(a);
free(b);
return 0;
}

View File

@ -36,7 +36,12 @@
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
double fabs(double);
@ -81,7 +86,37 @@ double fabs(double);
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
int main(int argc, char *argv[]){
@ -99,15 +134,15 @@ int main(int argc, char *argv[]){
char *p;
char btest = 'F';
blasint m, i, j, l, info, uplos=0;
double flops = 0.;
blasint m, i, j, info, uplos=0;
double flops;
int from = 1;
int to = 200;
int step = 1;
int loops = 1;
double time1, timeg;
struct timeval start, stop;
double time1;
argc--;argv++;
@ -120,8 +155,6 @@ int main(int argc, char *argv[]){
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
@ -132,53 +165,51 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
for(m = from; m <= to; m += step){
timeg=0.;
for (l = 0; l < loops; l++) {
#ifndef COMPLEX
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
for(i = 0; i < j; i++) a[i + j * m] = 0.;
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
}
}
#else
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
a[(i + j * m) * 2 + 0] = 0.;
a[(i + j * m) * 2 + 1] = 0.;
}
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[(j + j * m) * 2 + 1] = 0.;
for(i = j + 1; i < m; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = 0;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[(j + j * m) * 2 + 1] = 0.;
for(i = j + 1; i < m; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
a[(i + j * m) * 2 + 0] = 0.;
a[(i + j * m) * 2 + 1] = 0.;
}
}
}
@ -186,19 +217,19 @@ int main(int argc, char *argv[]){
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
begin();
gettimeofday( &start, (struct timezone *)0);
POTRF(uplo[uplos], &m, b, &m, &info);
end();
gettimeofday( &stop, (struct timezone *)0);
if (info != 0) {
fprintf(stderr, "Potrf info = %d\n", info);
exit(1);
}
if ( btest == 'F')
timeg += getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
if ( btest == 'S' )
{
@ -209,43 +240,39 @@ int main(int argc, char *argv[]){
}
}
begin();
gettimeofday( &start, (struct timezone *)0);
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
end();
gettimeofday( &stop, (struct timezone *)0);
if (info != 0) {
fprintf(stderr, "Potrs info = %d\n", info);
exit(1);
}
timeg += getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
}
if ( btest == 'I' )
{
begin();
gettimeofday( &start, (struct timezone *)0);
POTRI(uplo[uplos], &m, b, &m, &info);
end();
gettimeofday( &stop, (struct timezone *)0);
if (info != 0) {
fprintf(stderr, "Potri info = %d\n", info);
exit(1);
}
timeg += getsec();
}
} // loops
time1 = timeg/(double)loops;
if ( btest == 'F')
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
if ( btest == 'S')
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
if ( btest == 'I')
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
}
fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest);

View File

@ -1,49 +0,0 @@
# Continuous benchmarking of OpenBLAS performance
We run a set of benchmarks of subset of OpenBLAS functionality.
## Benchmark runner
[![CodSpeed Badge](https://img.shields.io/endpoint?url=https://codspeed.io/badge.json)](https://codspeed.io/OpenMathLib/OpenBLAS/)
Click on [benchmarks](https://codspeed.io/OpenMathLib/OpenBLAS/benchmarks) to see the performance of a particular benchmark over time;
Click on [branches](https://codspeed.io/OpenMathLib/OpenBLAS/branches/) and then on the last PR link to see the flamegraphs.
## What are the benchmarks
We run raw BLAS/LAPACK subroutines, via f2py-generated python wrappers. The wrappers themselves are equivalent to [those from SciPy](https://docs.scipy.org/doc/scipy/reference/linalg.lapack.html).
In fact, the wrappers _are_ from SciPy, we take a small subset simply to avoid having to build the whole SciPy for each CI run.
## Adding a new benchmark
`.github/workflows/codspeed-bench.yml` does all the orchestration on CI.
Benchmarks live in the `benchmark/pybench` directory. It is organized as follows:
- benchmarks themselves live in the `benchmarks` folder. Note that the LAPACK routines are imported from the `openblas_wrap` package.
- the `openblas_wrap` package is a simple trampoline: it contains an f2py extension, `_flapack`, which talks to OpenBLAS, and exports the python names in its `__init__.py`.
This way, the `openblas_wrap` package shields the benchmarks from the details of where a particular LAPACK function comes from. If wanted, you may for instance swap the `_flapack` extension to
`scipy.linalg.blas` and `scipy.linalg.lapack`.
To change parameters of an existing benchmark, edit python files in the `benchmark/pybench/benchmarks` directory.
To add a benchmark for a new BLAS or LAPACK function, you need to:
- add an f2py wrapper for the bare LAPACK function. You can simply copy a wrapper from SciPy (look for `*.pyf.src` files in https://github.com/scipy/scipy/tree/main/scipy/linalg)
- add an import to `benchmark/pybench/openblas_wrap/__init__.py`
## Running benchmarks locally
This benchmarking layer is orchestrated from python, therefore you'll need to
have all what it takes to build OpenBLAS from source, plus `python` and
```
$ python -mpip install numpy meson ninja pytest pytest-benchmark
```
The benchmark syntax is consistent with that of `pytest-benchmark` framework. The incantation to run the suite locally is `$ pytest benchmark/pybench/benchmarks/test_blas.py`.
An ASV compatible benchmark suite is planned but currently not implemented.

View File

@ -1,274 +0,0 @@
import pytest
import numpy as np
import openblas_wrap as ow
dtype_map = {
's': np.float32,
'd': np.float64,
'c': np.complex64,
'z': np.complex128,
'dz': np.complex128,
}
# ### BLAS level 1 ###
# dnrm2
dnrm2_sizes = [100, 1000]
def run_dnrm2(n, x, incx, func):
res = func(x, n, incx=incx)
return res
@pytest.mark.parametrize('variant', ['d', 'dz'])
@pytest.mark.parametrize('n', dnrm2_sizes)
def test_nrm2(benchmark, n, variant):
rndm = np.random.RandomState(1234)
dtyp = dtype_map[variant]
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
nrm2 = ow.get_func('nrm2', variant)
result = benchmark(run_dnrm2, n, x, 1, nrm2)
# ddot
ddot_sizes = [100, 1000]
def run_ddot(x, y, func):
res = func(x, y)
return res
@pytest.mark.parametrize('n', ddot_sizes)
def test_dot(benchmark, n):
rndm = np.random.RandomState(1234)
x = np.array(rndm.uniform(size=(n,)), dtype=float)
y = np.array(rndm.uniform(size=(n,)), dtype=float)
dot = ow.get_func('dot', 'd')
result = benchmark(run_ddot, x, y, dot)
# daxpy
daxpy_sizes = [100, 1000]
def run_daxpy(x, y, func):
res = func(x, y, a=2.0)
return res
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
@pytest.mark.parametrize('n', daxpy_sizes)
def test_daxpy(benchmark, n, variant):
rndm = np.random.RandomState(1234)
dtyp = dtype_map[variant]
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
y = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
axpy = ow.get_func('axpy', variant)
result = benchmark(run_daxpy, x, y, axpy)
# ### BLAS level 2 ###
gemv_sizes = [100, 1000]
def run_gemv(a, x, y, func):
res = func(1.0, a, x, y=y, overwrite_y=True)
return res
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
@pytest.mark.parametrize('n', gemv_sizes)
def test_dgemv(benchmark, n, variant):
rndm = np.random.RandomState(1234)
dtyp = dtype_map[variant]
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
y = np.empty(n, dtype=dtyp)
a = np.array(rndm.uniform(size=(n,n)), dtype=dtyp)
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
y = np.zeros(n, dtype=dtyp)
gemv = ow.get_func('gemv', variant)
result = benchmark(run_gemv, a, x, y, gemv)
assert result is y
# dgbmv
dgbmv_sizes = [100, 1000]
def run_gbmv(m, n, kl, ku, a, x, y, func):
res = func(m, n, kl, ku, 1.0, a, x, y=y, overwrite_y=True)
return res
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
@pytest.mark.parametrize('n', dgbmv_sizes)
@pytest.mark.parametrize('kl', [1])
def test_dgbmv(benchmark, n, kl, variant):
rndm = np.random.RandomState(1234)
dtyp = dtype_map[variant]
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
y = np.empty(n, dtype=dtyp)
m = n
a = rndm.uniform(size=(2*kl + 1, n))
a = np.array(a, dtype=dtyp, order='F')
gbmv = ow.get_func('gbmv', variant)
result = benchmark(run_gbmv, m, n, kl, kl, a, x, y, gbmv)
assert result is y
# ### BLAS level 3 ###
# dgemm
gemm_sizes = [100, 1000]
def run_gemm(a, b, c, func):
alpha = 1.0
res = func(alpha, a, b, c=c, overwrite_c=True)
return res
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
@pytest.mark.parametrize('n', gemm_sizes)
def test_gemm(benchmark, n, variant):
rndm = np.random.RandomState(1234)
dtyp = dtype_map[variant]
a = np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F')
b = np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F')
c = np.empty((n, n), dtype=dtyp, order='F')
gemm = ow.get_func('gemm', variant)
result = benchmark(run_gemm, a, b, c, gemm)
assert result is c
# dsyrk
syrk_sizes = [100, 1000]
def run_syrk(a, c, func):
res = func(1.0, a, c=c, overwrite_c=True)
return res
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
@pytest.mark.parametrize('n', syrk_sizes)
def test_syrk(benchmark, n, variant):
rndm = np.random.RandomState(1234)
dtyp = dtype_map[variant]
a = np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F')
c = np.empty((n, n), dtype=dtyp, order='F')
syrk = ow.get_func('syrk', variant)
result = benchmark(run_syrk, a, c, syrk)
assert result is c
# ### LAPACK ###
# linalg.solve
gesv_sizes = [100, 1000]
def run_gesv(a, b, func):
res = func(a, b, overwrite_a=True, overwrite_b=True)
return res
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
@pytest.mark.parametrize('n', gesv_sizes)
def test_gesv(benchmark, n, variant):
rndm = np.random.RandomState(1234)
dtyp = dtype_map[variant]
a = (np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F') +
np.eye(n, dtype=dtyp, order='F'))
b = np.array(rndm.uniform(size=(n, 1)), dtype=dtyp, order='F')
gesv = ow.get_func('gesv', variant)
lu, piv, x, info = benchmark(run_gesv, a, b, gesv)
assert lu is a
assert x is b
assert info == 0
# linalg.svd
gesdd_sizes = [(100, 5), (1000, 222)]
def run_gesdd(a, lwork, func):
res = func(a, lwork=lwork, full_matrices=False, overwrite_a=False)
return res
@pytest.mark.parametrize('variant', ['s', 'd'])
@pytest.mark.parametrize('mn', gesdd_sizes)
def test_gesdd(benchmark, mn, variant):
m, n = mn
rndm = np.random.RandomState(1234)
dtyp = dtype_map[variant]
a = np.array(rndm.uniform(size=(m, n)), dtype=dtyp, order='F')
gesdd_lwork = ow.get_func('gesdd_lwork', variant)
lwork, info = gesdd_lwork(m, n)
lwork = int(lwork)
assert info == 0
gesdd = ow.get_func('gesdd', variant)
u, s, vt, info = benchmark(run_gesdd, a, lwork, gesdd)
assert info == 0
atol = {'s': 1e-5, 'd': 1e-13}
np.testing.assert_allclose(u @ np.diag(s) @ vt, a, atol=atol[variant])
# linalg.eigh
syev_sizes = [50, 200]
def run_syev(a, lwork, func):
res = func(a, lwork=lwork, overwrite_a=True)
return res
@pytest.mark.parametrize('variant', ['s', 'd'])
@pytest.mark.parametrize('n', syev_sizes)
def test_syev(benchmark, n, variant):
rndm = np.random.RandomState(1234)
dtyp = dtype_map[variant]
a = rndm.uniform(size=(n, n))
a = np.asarray(a + a.T, dtype=dtyp, order='F')
a_ = a.copy()
dsyev_lwork = ow.get_func('syev_lwork', variant)
lwork, info = dsyev_lwork(n)
lwork = int(lwork)
assert info == 0
syev = ow.get_func('syev', variant)
w, v, info = benchmark(run_syev, a, lwork, syev)
assert info == 0
assert a is v # overwrite_a=True

View File

@ -1,48 +0,0 @@
#
# Taken from SciPy (of course)
#
project(
'openblas-wrap',
'c', 'fortran',
version: '0.1',
license: 'BSD-3',
meson_version: '>= 1.1.0',
default_options: [
'buildtype=debugoptimized',
'b_ndebug=if-release',
'c_std=c17',
'fortran_std=legacy',
],
)
py3 = import('python').find_installation(pure: false)
py3_dep = py3.dependency()
cc = meson.get_compiler('c')
_global_c_args = cc.get_supported_arguments(
'-Wno-unused-but-set-variable',
'-Wno-unused-function',
'-Wno-conversion',
'-Wno-misleading-indentation',
)
add_project_arguments(_global_c_args, language : 'c')
# We need -lm for all C code (assuming it uses math functions, which is safe to
# assume for SciPy). For C++ it isn't needed, because libstdc++/libc++ is
# guaranteed to depend on it. For Fortran code, Meson already adds `-lm`.
m_dep = cc.find_library('m', required : false)
if m_dep.found()
add_project_link_arguments('-lm', language : 'c')
endif
generate_f2pymod = find_program('openblas_wrap/generate_f2pymod.py')
openblas = dependency('openblas', method: 'pkg-config', required: true)
openblas_dep = declare_dependency(
dependencies: openblas,
compile_args: []
)
subdir('openblas_wrap')

View File

@ -1,17 +0,0 @@
"""
Trampoline to hide the LAPACK details (scipy.lapack.linalg or scipy_openblas32 or...)
from benchmarking.
"""
__version__ = "0.1"
from . import _flapack
PREFIX = ''
def get_func(name, variant):
"""get_func('gesv', 'c') -> cgesv etc."""
return getattr(_flapack, PREFIX + variant + name)

View File

@ -1,417 +0,0 @@
!
! Taken from scipy/linalg
!
! Shorthand notations
!
! <tchar=s,d,cs,zd>
! <tchar2c=cs,zd>
!
! <prefix2=s,d>
! <prefix2c=c,z>
! <prefix3=s,sc>
! <prefix4=d,dz>
! <prefix6=s,d,c,z,c,z>
!
! <ftype2=real,double precision>
! <ftype2c=complex,double complex>
! <ftype3=real,complex>
! <ftype4=double precision,double complex>
! <ftypereal3=real,real>
! <ftypereal4=double precision,double precision>
! <ftype6=real,double precision,complex,double complex,\2,\3>
! <ftype6creal=real,double precision,complex,double complex,\0,\1>
!
! <ctype2=float,double>
! <ctype2c=complex_float,complex_double>
! <ctype3=float,complex_float>
! <ctype4=double,complex_double>
! <ctypereal3=float,float>
! <ctypereal4=double,double>
! <ctype6=float,double,complex_float,complex_double,\2,\3>
! <ctype6creal=float,double,complex_float,complex_double,\0,\1>
!
!
! Level 1 BLAS
!
python module _flapack
usercode '''
#define F_INT int
'''
interface
subroutine <prefix>axpy(n,a,x,offx,incx,y,offy,incy)
! Calculate z = a*x+y, where a is scalar.
callstatement (*f2py_func)(&n,&a,x+offx,&incx,y+offy,&incy)
callprotoargument F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*,F_INT*
<ftype> dimension(*), intent(in) :: x
<ftype> dimension(*), intent(in,out,out=z) :: y
<ftype> optional, intent(in):: a=<1.0,\0,(1.0\,0.0),\2>
integer optional, intent(in),check(incx>0||incx<0) :: incx = 1
integer optional, intent(in),check(incy>0||incy<0) :: incy = 1
integer optional, intent(in),depend(x) :: offx=0
integer optional, intent(in),depend(y) :: offy=0
check(offx>=0 && offx<len(x)) :: offx
check(offy>=0 && offy<len(y)) :: offy
integer optional, intent(in),depend(x,incx,offx,y,incy,offy) :: &
n = (len(x)-offx)/abs(incx)
check(len(x)-offx>(n-1)*abs(incx)) :: n
check(len(y)-offy>(n-1)*abs(incy)) :: n
end subroutine <prefix>axpy
function ddot(n,x,offx,incx,y,offy,incy) result (xy)
! Computes a vector-vector dot product.
callstatement ddot_return_value = (*f2py_func)(&n,x+offx,&incx,y+offy,&incy)
callprotoargument F_INT*,double*,F_INT*,double*,F_INT*
intent(c) ddot
fortranname F_FUNC(ddot,DDOT)
double precision dimension(*), intent(in) :: x
double precision dimension(*), intent(in) :: y
double precision ddot,xy
integer optional, intent(in),check(incx>0||incx<0) :: incx = 1
integer optional, intent(in),check(incy>0||incy<0) :: incy = 1
integer optional, intent(in),depend(x) :: offx=0
integer optional, intent(in),depend(y) :: offy=0
check(offx>=0 && offx<len(x)) :: offx
check(offy>=0 && offy<len(y)) :: offy
integer optional, intent(in),depend(x,incx,offx,y,incy,offy) :: &
n = (len(x)-offx)/abs(incx)
check(len(x)-offx>(n-1)*abs(incx)) :: n
check(len(y)-offy>(n-1)*abs(incy)) :: n
end function ddot
function <prefix4>nrm2(n,x,offx,incx) result(n2)
<ftypereal4> <prefix4>nrm2, n2
callstatement <prefix4>nrm2_return_value = (*f2py_func)(&n,x+offx,&incx)
callprotoargument F_INT*,<ctype4>*,F_INT*
intent(c) <prefix4>nrm2
fortranname F_FUNC(<prefix4>nrm2,<D,DZ>NRM2)
<ftype4> dimension(*),intent(in) :: x
integer optional, intent(in),check(incx>0) :: incx = 1
integer optional,intent(in),depend(x) :: offx=0
check(offx>=0 && offx<len(x)) :: offx
integer optional,intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/abs(incx)
check(len(x)-offx>(n-1)*abs(incx)) :: n
end function <prefix4>nrm2
!
! Level 2 BLAS
!
subroutine <prefix>gemv(m,n,alpha,a,x,beta,y,offx,incx,offy,incy,trans,rows,cols,ly)
! Computes a matrix-vector product using a general matrix
!
! y = gemv(alpha,a,x,beta=0,y=0,offx=0,incx=1,offy=0,incy=0,trans=0)
! Calculate y <- alpha * op(A) * x + beta * y
callstatement (*f2py_func)((trans?(trans==2?"C":"T"):"N"),&m,&n,&alpha,a,&m, &
x+offx,&incx,&beta,y+offy,&incy)
callprotoargument char*,F_INT*,F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*,F_INT*,<ctype>*, &
<ctype>*,F_INT*
integer optional, intent(in), check(trans>=0 && trans <=2) :: trans = 0
integer optional, intent(in), check(incx>0||incx<0) :: incx = 1
integer optional, intent(in), check(incy>0||incy<0) :: incy = 1
<ftype> intent(in) :: alpha
<ftype> intent(in), optional :: beta = <0.0,\0,(0.0\,0.0),\2>
<ftype> dimension(*), intent(in) :: x
<ftype> dimension(ly), intent(in,copy,out), depend(ly),optional :: y
integer intent(hide), depend(incy,rows,offy) :: ly = &
(y_capi==Py_None?1+offy+(rows-1)*abs(incy):-1)
<ftype> dimension(m,n), intent(in) :: a
integer depend(a), intent(hide):: m = shape(a,0)
integer depend(a), intent(hide):: n = shape(a,1)
integer optional, intent(in) :: offx=0
integer optional, intent(in) :: offy=0
check(offx>=0 && offx<len(x)) :: x
check(len(x)>offx+(cols-1)*abs(incx)) :: x
depend(offx,cols,incx) :: x
check(offy>=0 && offy<len(y)) :: y
check(len(y)>offy+(rows-1)*abs(incy)) :: y
depend(offy,rows,incy) :: y
integer depend(m,n,trans), intent(hide) :: rows = (trans?n:m)
integer depend(m,n,trans), intent(hide) :: cols = (trans?m:n)
end subroutine <prefix>gemv
subroutine <prefix>gbmv(m,n,kl,ku,alpha,a,lda,x,incx,offx,beta,y,incy,offy,trans,ly)
! Performs one of the matrix-vector operations
!
! y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
! or y := alpha*A**H*x + beta*y,
!
! where alpha and beta are scalars, x and y are vectors and A is an
! m by n band matrix, with kl sub-diagonals and ku super-diagonals.
callstatement (*f2py_func)((trans?(trans==2?"C":"T"):"N"),&m,&n,&kl,&ku,&alpha,a,&lda,x+offx,&incx,&beta,y+offy,&incy)
callprotoargument char*,F_INT*,F_INT*,F_INT*,F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*,F_INT*,<ctype>*,<ctype>*,F_INT*
integer optional,intent(in),check(trans>=0 && trans <=2) :: trans = 0
integer intent(in), depend(ku,kl),check(m>=ku+kl+1) :: m
integer intent(in),check(n>=0&&n==shape(a,1)),depend(a) :: n
integer intent(in),check(kl>=0) :: kl
integer intent(in),check(ku>=0) :: ku
integer intent(hide),depend(a) :: lda = MAX(shape(a,0),1)
integer optional, intent(in),check(incx>0||incx<0) :: incx = 1
integer optional, intent(in),check(incy>0||incy<0) :: incy = 1
integer intent(hide),depend(m,n,incy,offy,trans) :: ly = &
(y_capi==Py_None?1+offy+(trans==0?m-1:n-1)*abs(incy):-1)
integer optional, intent(in) :: offx=0
integer optional, intent(in) :: offy=0
<ftype> intent(in) :: alpha
<ftype> intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2>
<ftype> dimension(lda,n),intent(in) :: a
<ftype> dimension(ly), intent(in,out,copy,out=yout),depend(ly),optional :: y
check(offy>=0 && offy<len(y)) :: y
check(len(y)>offy+(trans==0?m-1:n-1)*abs(incy)) :: y
depend(offy,n,incy) :: y
<ftype> dimension(*), intent(in) :: x
check(offx>=0 && offx<len(x)) :: x
check(len(x)>offx+(trans==0?n-1:m-1)*abs(incx)) :: x
depend(offx,n,incx) :: x
end subroutine <prefix>gbmv
!
! Level 3 BLAS
!
subroutine <prefix>gemm(m,n,k,alpha,a,b,beta,c,trans_a,trans_b,lda,ka,ldb,kb)
! Computes a scalar-matrix-matrix product and adds the result to a
! scalar-matrix product.
!
! c = gemm(alpha,a,b,beta=0,c=0,trans_a=0,trans_b=0,overwrite_c=0)
! Calculate C <- alpha * op(A) * op(B) + beta * C
callstatement (*f2py_func)((trans_a?(trans_a==2?"C":"T"):"N"), &
(trans_b?(trans_b==2?"C":"T"):"N"),&m,&n,&k,&alpha,a,&lda,b,&ldb,&beta,c,&m)
callprotoargument char*,char*,F_INT*,F_INT*,F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*, &
F_INT*,<ctype>*,<ctype>*,F_INT*
integer optional,intent(in),check(trans_a>=0 && trans_a <=2) :: trans_a = 0
integer optional,intent(in),check(trans_b>=0 && trans_b <=2) :: trans_b = 0
<ftype> intent(in) :: alpha
<ftype> intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2>
<ftype> dimension(lda,ka),intent(in) :: a
<ftype> dimension(ldb,kb),intent(in) :: b
<ftype> dimension(m,n),intent(in,out,copy),depend(m,n),optional :: c
check(shape(c,0)==m && shape(c,1)==n) :: c
integer depend(a),intent(hide) :: lda = shape(a,0)
integer depend(a),intent(hide) :: ka = shape(a,1)
integer depend(b),intent(hide) :: ldb = shape(b,0)
integer depend(b),intent(hide) :: kb = shape(b,1)
integer depend(a,trans_a,ka,lda),intent(hide):: m = (trans_a?ka:lda)
integer depend(a,trans_a,ka,lda),intent(hide):: k = (trans_a?lda:ka)
integer depend(b,trans_b,kb,ldb,k),intent(hide),check(trans_b?kb==k:ldb==k) :: &
n = (trans_b?ldb:kb)
end subroutine <prefix>gemm
subroutine <prefix6><sy,\0,\0,\0,he,he>rk(n,k,alpha,a,beta,c,trans,lower,lda,ka)
! performs one of the symmetric rank k operations
! C := alpha*A*A**T + beta*C, or C := alpha*A**T*A + beta*C,
!
! c = syrk(alpha,a,beta=0,c=0,trans=0,lower=0,overwrite_c=0)
!
callstatement (*f2py_func)((lower?"L":"U"), &
(trans?(trans==2?"C":"T"):"N"), &n,&k,&alpha,a,&lda,&beta,c,&n)
callprotoargument char*,char*,F_INT*,F_INT*,<ctype6>*,<ctype6>*,F_INT*,<ctype6>*, &
<ctype6>*,F_INT*
integer optional, intent(in),check(lower==0||lower==1) :: lower = 0
integer optional,intent(in),check(trans>=0 && trans <=2) :: trans = 0
<ftype6> intent(in) :: alpha
<ftype6> intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2,\2,\2>
<ftype6> dimension(lda,ka),intent(in) :: a
<ftype6> dimension(n,n),intent(in,out,copy),depend(n),optional :: c
check(shape(c,0)==n && shape(c,1)==n) :: c
integer depend(a),intent(hide) :: lda = shape(a,0)
integer depend(a),intent(hide) :: ka = shape(a,1)
integer depend(a, trans, ka, lda), intent(hide) :: n = (trans ? ka : lda)
integer depend(a, trans, ka, lda), intent(hide) :: k = (trans ? lda : ka)
end subroutine <prefix6><sy,\0,\0,\0,he,he>rk
!
! LAPACK
!
subroutine <prefix>gesv(n,nrhs,a,piv,b,info)
! lu,piv,x,info = gesv(a,b,overwrite_a=0,overwrite_b=0)
! Solve A * X = B.
! A = P * L * U
! U is upper diagonal triangular, L is unit lower triangular,
! piv pivots columns.
callstatement {F_INT i;(*f2py_func)(&n,&nrhs,a,&n,piv,b,&n,&info);for(i=0;i\<n;--piv[i++]);}
callprotoargument F_INT*,F_INT*,<ctype>*,F_INT*,F_INT*,<ctype>*,F_INT*,F_INT*
integer depend(a),intent(hide):: n = shape(a,0)
integer depend(b),intent(hide):: nrhs = shape(b,1)
<ftype> dimension(n,n),check(shape(a,0)==shape(a,1)) :: a
integer dimension(n),depend(n),intent(out) :: piv
<ftype> dimension(n,nrhs),check(shape(a,0)==shape(b,0)),depend(n) :: b
integer intent(out)::info
intent(in,out,copy,out=x) b
intent(in,out,copy,out=lu) a
end subroutine <prefix>gesv
subroutine <prefix2>gesdd(m,n,minmn,u0,u1,vt0,vt1,a,compute_uv,full_matrices,u,s,vt,work,lwork,iwork,info)
! u,s,vt,info = gesdd(a,compute_uv=1,lwork=..,overwrite_a=0)
! Compute the singular value decomposition (SVD) using divide and conquer:
! A = U * SIGMA * transpose(V)
! A - M x N matrix
! U - M x M matrix or min(M,N) x N if full_matrices=False
! SIGMA - M x N zero matrix with a main diagonal filled with min(M,N)
! singular values
! transpose(V) - N x N matrix or N x min(M,N) if full_matrices=False
callstatement (*f2py_func)((compute_uv?(full_matrices?"A":"S"):"N"),&m,&n,a,&m,s,u,&u0,vt,&vt0,work,&lwork,iwork,&info)
callprotoargument char*,F_INT*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,F_INT*,F_INT*
integer intent(in),optional,check(compute_uv==0||compute_uv==1):: compute_uv = 1
integer intent(in),optional,check(full_matrices==0||full_matrices==1):: full_matrices = 1
integer intent(hide),depend(a):: m = shape(a,0)
integer intent(hide),depend(a):: n = shape(a,1)
integer intent(hide),depend(m,n):: minmn = MIN(m,n)
integer intent(hide),depend(compute_uv,minmn) :: u0 = (compute_uv?m:1)
integer intent(hide),depend(compute_uv,minmn, full_matrices) :: u1 = (compute_uv?(full_matrices?m:minmn):1)
integer intent(hide),depend(compute_uv,minmn, full_matrices) :: vt0 = (compute_uv?(full_matrices?n:minmn):1)
integer intent(hide),depend(compute_uv,minmn) :: vt1 = (compute_uv?n:1)
<ftype2> dimension(m,n),intent(in,copy,aligned8) :: a
<ftype2> dimension(minmn),intent(out),depend(minmn) :: s
<ftype2> dimension(u0,u1),intent(out),depend(u0, u1) :: u
<ftype2> dimension(vt0,vt1),intent(out),depend(vt0, vt1) :: vt
<ftype2> dimension(lwork),intent(hide,cache),depend(lwork) :: work
integer optional,intent(in),depend(minmn,compute_uv) &
:: lwork = max((compute_uv?4*minmn*minmn+MAX(m,n)+9*minmn:MAX(14*minmn+4,10*minmn+2+25*(25+8))+MAX(m,n)),1)
integer intent(hide,cache),dimension(8*minmn),depend(minmn) :: iwork
integer intent(out)::info
end subroutine <prefix2>gesdd
subroutine <prefix2>gesdd_lwork(m,n,minmn,u0,vt0,a,compute_uv,full_matrices,u,s,vt,work,lwork,iwork,info)
! LWORK computation for (S/D)GESDD
fortranname <prefix2>gesdd
callstatement (*f2py_func)((compute_uv?(full_matrices?"A":"S"):"N"),&m,&n,&a,&m,&s,&u,&u0,&vt,&vt0,&work,&lwork,&iwork,&info)
callprotoargument char*,F_INT*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,F_INT*,F_INT*
integer intent(in),optional,check(compute_uv==0||compute_uv==1):: compute_uv = 1
integer intent(in),optional,check(full_matrices==0||full_matrices==1):: full_matrices = 1
integer intent(in) :: m
integer intent(in) :: n
integer intent(hide),depend(m,n):: minmn = MIN(m,n)
integer intent(hide),depend(compute_uv,minmn) :: u0 = (compute_uv?m:1)
integer intent(hide),depend(compute_uv,minmn, full_matrices) :: vt0 = (compute_uv?(full_matrices?n:minmn):1)
<ftype2> intent(hide) :: a
<ftype2> intent(hide) :: s
<ftype2> intent(hide) :: u
<ftype2> intent(hide) :: vt
<ftype2> intent(out) :: work
integer intent(hide) :: lwork = -1
integer intent(hide) :: iwork
integer intent(out) :: info
end subroutine <prefix2>gesdd_lwork
subroutine <prefix2>syev(compute_v,lower,n,w,a,lda,work,lwork,info)
! w,v,info = syev(a,compute_v=1,lower=0,lwork=3*n-1,overwrite_a=0)
! Compute all eigenvalues and, optionally, eigenvectors of a
! real symmetric matrix A.
!
! Performance tip:
! If compute_v=0 then set also overwrite_a=1.
callstatement (*f2py_func)((compute_v?"V":"N"),(lower?"L":"U"),&n,a,&lda,w,work,&lwork,&info)
callprotoargument char*,char*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,F_INT*
integer optional,intent(in):: compute_v = 1
check(compute_v==1||compute_v==0) compute_v
integer optional,intent(in),check(lower==0||lower==1) :: lower = 0
integer intent(hide),depend(a):: n = shape(a,0)
integer intent(hide),depend(a):: lda = MAX(1,shape(a,0))
<ftype2> dimension(n,n),check(shape(a,0)==shape(a,1)) :: a
intent(in,copy,out,out=v) :: a
<ftype2> dimension(n),intent(out),depend(n) :: w
integer optional,intent(in),depend(n) :: lwork=max(3*n-1,1)
check(lwork>=3*n-1) :: lwork
<ftype2> dimension(lwork),intent(hide),depend(lwork) :: work
integer intent(out) :: info
end subroutine <prefix2>syev
subroutine <prefix2>syev_lwork(lower,n,w,a,lda,work,lwork,info)
! LWORK routines for syev
fortranname <prefix2>syev
callstatement (*f2py_func)("N",(lower?"L":"U"),&n,&a,&lda,&w,&work,&lwork,&info)
callprotoargument char*,char*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,F_INT*
integer intent(in):: n
integer optional,intent(in),check(lower==0||lower==1) :: lower = 0
integer intent(hide),depend(n):: lda = MAX(1, n)
<ftype2> intent(hide):: a
<ftype2> intent(hide):: w
integer intent(hide):: lwork = -1
<ftype2> intent(out):: work
integer intent(out):: info
end subroutine <prefix2>syev_lwork
end interface
end python module _flapack

View File

@ -1,299 +0,0 @@
#!/usr/bin/env python3
"""
Process f2py template files (`filename.pyf.src` -> `filename.pyf`)
Usage: python generate_pyf.py filename.pyf.src -o filename.pyf
"""
import os
import sys
import re
import subprocess
import argparse
# START OF CODE VENDORED FROM `numpy.distutils.from_template`
#############################################################
"""
process_file(filename)
takes templated file .xxx.src and produces .xxx file where .xxx
is .pyf .f90 or .f using the following template rules:
'<..>' denotes a template.
All function and subroutine blocks in a source file with names that
contain '<..>' will be replicated according to the rules in '<..>'.
The number of comma-separated words in '<..>' will determine the number of
replicates.
'<..>' may have two different forms, named and short. For example,
named:
<p=d,s,z,c> where anywhere inside a block '<p>' will be replaced with
'd', 's', 'z', and 'c' for each replicate of the block.
<_c> is already defined: <_c=s,d,c,z>
<_t> is already defined: <_t=real,double precision,complex,double complex>
short:
<s,d,c,z>, a short form of the named, useful when no <p> appears inside
a block.
In general, '<..>' contains a comma separated list of arbitrary
expressions. If these expression must contain a comma|leftarrow|rightarrow,
then prepend the comma|leftarrow|rightarrow with a backslash.
If an expression matches '\\<index>' then it will be replaced
by <index>-th expression.
Note that all '<..>' forms in a block must have the same number of
comma-separated entries.
Predefined named template rules:
<prefix=s,d,c,z>
<ftype=real,double precision,complex,double complex>
<ftypereal=real,double precision,\\0,\\1>
<ctype=float,double,complex_float,complex_double>
<ctypereal=float,double,\\0,\\1>
"""
routine_start_re = re.compile(
r'(\n|\A)(( (\$|\*))|)\s*(subroutine|function)\b',
re.I
)
routine_end_re = re.compile(r'\n\s*end\s*(subroutine|function)\b.*(\n|\Z)', re.I)
function_start_re = re.compile(r'\n (\$|\*)\s*function\b', re.I)
def parse_structure(astr):
""" Return a list of tuples for each function or subroutine each
tuple is the start and end of a subroutine or function to be
expanded.
"""
spanlist = []
ind = 0
while True:
m = routine_start_re.search(astr, ind)
if m is None:
break
start = m.start()
if function_start_re.match(astr, start, m.end()):
while True:
i = astr.rfind('\n', ind, start)
if i==-1:
break
start = i
if astr[i:i+7]!='\n $':
break
start += 1
m = routine_end_re.search(astr, m.end())
ind = end = m and m.end()-1 or len(astr)
spanlist.append((start, end))
return spanlist
template_re = re.compile(r"<\s*(\w[\w\d]*)\s*>")
named_re = re.compile(r"<\s*(\w[\w\d]*)\s*=\s*(.*?)\s*>")
list_re = re.compile(r"<\s*((.*?))\s*>")
def find_repl_patterns(astr):
reps = named_re.findall(astr)
names = {}
for rep in reps:
name = rep[0].strip() or unique_key(names)
repl = rep[1].replace(r'\,', '@comma@')
thelist = conv(repl)
names[name] = thelist
return names
def find_and_remove_repl_patterns(astr):
names = find_repl_patterns(astr)
astr = re.subn(named_re, '', astr)[0]
return astr, names
item_re = re.compile(r"\A\\(?P<index>\d+)\Z")
def conv(astr):
b = astr.split(',')
l = [x.strip() for x in b]
for i in range(len(l)):
m = item_re.match(l[i])
if m:
j = int(m.group('index'))
l[i] = l[j]
return ','.join(l)
def unique_key(adict):
""" Obtain a unique key given a dictionary."""
allkeys = list(adict.keys())
done = False
n = 1
while not done:
newkey = '__l%s' % (n)
if newkey in allkeys:
n += 1
else:
done = True
return newkey
template_name_re = re.compile(r'\A\s*(\w[\w\d]*)\s*\Z')
def expand_sub(substr, names):
substr = substr.replace(r'\>', '@rightarrow@')
substr = substr.replace(r'\<', '@leftarrow@')
lnames = find_repl_patterns(substr)
substr = named_re.sub(r"<\1>", substr) # get rid of definition templates
def listrepl(mobj):
thelist = conv(mobj.group(1).replace(r'\,', '@comma@'))
if template_name_re.match(thelist):
return "<%s>" % (thelist)
name = None
for key in lnames.keys(): # see if list is already in dictionary
if lnames[key] == thelist:
name = key
if name is None: # this list is not in the dictionary yet
name = unique_key(lnames)
lnames[name] = thelist
return "<%s>" % name
substr = list_re.sub(listrepl, substr) # convert all lists to named templates
# newnames are constructed as needed
numsubs = None
base_rule = None
rules = {}
for r in template_re.findall(substr):
if r not in rules:
thelist = lnames.get(r, names.get(r, None))
if thelist is None:
raise ValueError('No replicates found for <%s>' % (r))
if r not in names and not thelist.startswith('_'):
names[r] = thelist
rule = [i.replace('@comma@', ',') for i in thelist.split(',')]
num = len(rule)
if numsubs is None:
numsubs = num
rules[r] = rule
base_rule = r
elif num == numsubs:
rules[r] = rule
else:
print("Mismatch in number of replacements (base <{}={}>) "
"for <{}={}>. Ignoring."
.format(base_rule, ','.join(rules[base_rule]), r, thelist))
if not rules:
return substr
def namerepl(mobj):
name = mobj.group(1)
return rules.get(name, (k+1)*[name])[k]
newstr = ''
for k in range(numsubs):
newstr += template_re.sub(namerepl, substr) + '\n\n'
newstr = newstr.replace('@rightarrow@', '>')
newstr = newstr.replace('@leftarrow@', '<')
return newstr
def process_str(allstr):
newstr = allstr
writestr = ''
struct = parse_structure(newstr)
oldend = 0
names = {}
names.update(_special_names)
for sub in struct:
cleanedstr, defs = find_and_remove_repl_patterns(newstr[oldend:sub[0]])
writestr += cleanedstr
names.update(defs)
writestr += expand_sub(newstr[sub[0]:sub[1]], names)
oldend = sub[1]
writestr += newstr[oldend:]
return writestr
include_src_re = re.compile(
r"(\n|\A)\s*include\s*['\"](?P<name>[\w\d./\\]+\.src)['\"]",
re.I
)
def resolve_includes(source):
d = os.path.dirname(source)
with open(source) as fid:
lines = []
for line in fid:
m = include_src_re.match(line)
if m:
fn = m.group('name')
if not os.path.isabs(fn):
fn = os.path.join(d, fn)
if os.path.isfile(fn):
lines.extend(resolve_includes(fn))
else:
lines.append(line)
else:
lines.append(line)
return lines
def process_file(source):
lines = resolve_includes(source)
return process_str(''.join(lines))
_special_names = find_repl_patterns('''
<_c=s,d,c,z>
<_t=real,double precision,complex,double complex>
<prefix=s,d,c,z>
<ftype=real,double precision,complex,double complex>
<ctype=float,double,complex_float,complex_double>
<ftypereal=real,double precision,\\0,\\1>
<ctypereal=float,double,\\0,\\1>
''')
# END OF CODE VENDORED FROM `numpy.distutils.from_template`
###########################################################
def main():
parser = argparse.ArgumentParser()
parser.add_argument("infile", type=str,
help="Path to the input file")
parser.add_argument("-o", "--outdir", type=str,
help="Path to the output directory")
args = parser.parse_args()
if not args.infile.endswith(('.pyf', '.pyf.src', '.f.src')):
raise ValueError(f"Input file has unknown extension: {args.infile}")
outdir_abs = os.path.join(os.getcwd(), args.outdir)
# Write out the .pyf/.f file
if args.infile.endswith(('.pyf.src', '.f.src')):
code = process_file(args.infile)
fname_pyf = os.path.join(args.outdir,
os.path.splitext(os.path.split(args.infile)[1])[0])
with open(fname_pyf, 'w') as f:
f.write(code)
else:
fname_pyf = args.infile
# Now invoke f2py to generate the C API module file
if args.infile.endswith(('.pyf.src', '.pyf')):
p = subprocess.Popen([sys.executable, '-m', 'numpy.f2py', fname_pyf,
'--build-dir', outdir_abs], #'--quiet'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.getcwd())
out, err = p.communicate()
if not (p.returncode == 0):
raise RuntimeError(f"Writing {args.outfile} with f2py failed!\n"
f"{out}\n"
r"{err}")
if __name__ == "__main__":
main()

View File

@ -1,50 +0,0 @@
# find numpy & f2py includes
inc_numpy = run_command(py3,
['-c', 'import os; os.chdir(".."); import numpy; print(numpy.get_include())'],
check : true
).stdout().strip()
inc_f2py = run_command(py3,
['-c', 'import os; os.chdir(".."); import numpy.f2py; print(numpy.f2py.get_include())'],
check : true
).stdout().strip()
inc_np = include_directories(inc_numpy, inc_f2py)
fortranobject_c = inc_f2py / 'fortranobject.c'
fortranobject_lib = static_library('_fortranobject',
fortranobject_c,
# c_args: numpy_nodepr_api,
dependencies: py3_dep,
include_directories: [inc_np, inc_f2py],
gnu_symbol_visibility: 'hidden',
)
fortranobject_dep = declare_dependency(
link_with: fortranobject_lib,
include_directories: [inc_np, inc_f2py],
)
# f2py generated wrappers
flapack_module = custom_target('flapack_module',
output: ['_flapackmodule.c'],
input: 'blas_lapack.pyf.src',
command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'],
)
py3.extension_module('_flapack',
flapack_module,
link_args: [], # version_link_args,
dependencies: [openblas_dep, fortranobject_dep],
install: true,
subdir: 'openblas_wrap'
)
py3.install_sources(
['__init__.py'],
subdir: 'openblas_wrap'
)

View File

@ -1,12 +0,0 @@
libdir=/home/br/repos/OpenBLAS/
includedir=/home/br/repos/OpenBLAS/
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
version=0.3.27
extralib=-lm -lpthread -lgfortran -lquadmath -L${libdir} -lopenblas
Name: openblas
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: ${version}
URL: https://github.com/xianyi/OpenBLAS
Libs: -L${libdir} -lopenblas
Libs.private: ${extralib}
Cflags: -I${includedir}

View File

@ -25,11 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef ROT
#ifndef COMPLEX
#undef DOT
#ifdef DOUBLE
#define ROT BLASFUNC(drot)
@ -37,14 +42,70 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ROT BLASFUNC(srot)
#endif
#else
#ifdef DOUBLE
#define ROT BLASFUNC(zdrot)
#else
#define ROT BLASFUNC(csrot)
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
@ -63,6 +124,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -85,7 +147,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -98,31 +160,32 @@ int main(int argc, char *argv[]){
fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (l=0; l<loops; l++)
{
begin();
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
ROT (&m, x, &inc_x, y, &inc_y, c, s);
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
timeg += time1;
}
}
timeg /= loops;
timeg /= loops;
fprintf(stderr,
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);

View File

@ -1,138 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef ROTM
#ifdef DOUBLE
#define ROTM BLASFUNC(drotm)
#else
#define ROTM BLASFUNC(srotm)
#endif
int main(int argc, char *argv[])
{
FLOAT *x, *y;
// FLOAT result;
blasint m, i;
blasint inc_x = 1, inc_y = 1;
FLOAT param[5] = {1, 2.0, 3.0, 4.0, 5.0};
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1, timeg;
argc--;
argv++;
if (argc > 0) {
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0) {
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0) {
step = atol(*argv);
argc--;
argv++;
}
if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY")))
inc_y = atoi(p);
fprintf(
stderr,
"From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n",
from, to, step, inc_x, inc_y, loops);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) ==
NULL) {
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}
if ((y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) ==
NULL) {
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for (m = from; m <= to; m += step) {
timeg = 0;
fprintf(stderr, " %6d : ", (int)m);
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
for (l = 0; l < loops; l++) {
begin();
ROTM(&m, x, &inc_x, y, &inc_y, param);
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr, " %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SCAL
@ -43,9 +49,74 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
blasint m, i;
blasint inc_x=1,inc_y=1;
@ -57,6 +128,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -74,7 +146,11 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
srandom(getpid());
#endif
@ -87,20 +163,30 @@ int main(int argc, char *argv[]){
fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
SCAL (&m, alpha, x, &inc_x);
}
end();
time1 = getsec();
gettimeofday( &stop, (struct timezone *)0);
timeg = time1 / loops;
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
#ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);

View File

@ -1,146 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef SPMV
#ifndef COMPLEX
#ifdef DOUBLE
#define SPMV BLASFUNC(dspmv)
#else
#define SPMV BLASFUNC(sspmv)
#endif
#else
#ifdef DOUBLE
#define SPMV BLASFUNC(zspmv)
#else
#define SPMV BLASFUNC(cspmv)
#endif
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char uplo='L';
blasint m, i, j;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m,(int)m);
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,124 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef SPR
#ifdef DOUBLE
#define SPR BLASFUNC(dspr)
#else
#define SPR BLASFUNC(sspr)
#endif
int main(int argc, char *argv[]){
FLOAT *a,*c;
FLOAT alpha[] = {1.0, 1.0};
blasint inc_x=1;
int loops = 1;
int l;
char *p;
char uplo='U';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
blasint m, i, j;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d\n", from, to, step,uplo,inc_x);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops Time\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
SPR (&uplo, &m, alpha, c, &inc_x, a);
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -1,135 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#undef SPR2
#ifdef DOUBLE
#define SPR2 BLASFUNC(dspr2)
#else
#define SPR2 BLASFUNC(sspr2)
#endif
int main(int argc, char *argv[]){
FLOAT *a,*b,*c;
FLOAT alpha[] = {1.0, 1.0};
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
char uplo='U';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
blasint m, i, j;
int from = 1;
int to = 200;
int step = 1;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops Time\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
end();
time1 = getsec();
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -25,7 +25,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SWAP
@ -44,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
@ -58,6 +128,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
@ -80,7 +151,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -104,13 +175,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
gettimeofday( &start, (struct timezone *)0);
SWAP (&m, x, &inc_x, y, &inc_y );
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;

View File

@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "bench.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SYMM
@ -47,6 +53,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *b, *c;
@ -66,6 +137,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
@ -90,7 +162,7 @@ int main(int argc, char *argv[]){
#ifdef __linux
#ifdef linux
srandom(getpid());
#endif
@ -103,19 +175,21 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
begin();
gettimeofday( &start, (struct timezone *)0);
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
end();
gettimeofday( &stop, (struct timezone *)0);
time1 = getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",

Some files were not shown because too many files have changed in this diff Show More