Merge branch 'develop' into risc-v

This commit is contained in:
Zhang Xianyi 2020-10-16 23:27:38 +08:00
commit d7ba7679b6
581 changed files with 47657 additions and 6952 deletions

View File

@ -8,7 +8,7 @@ platform:
steps:
- name: Build and Test
image: ubuntu:19.04
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
@ -32,7 +32,7 @@ platform:
steps:
- name: Build and Test
image: ubuntu:19.04
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV6 NUM_THREADS=32'
@ -141,3 +141,52 @@ steps:
- cmake $CMAKE_FLAGS ..
- make -j
- ctest -V
---
kind: pipeline
name: arm64_native_test
platform:
os: linux
arch: arm64
steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'USE_OPENMP=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C cpp_thread_test dgemm_tester
---
kind: pipeline
name: epyc_native_test
platform:
os: linux
arch: amd64
steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'USE_OPENMP=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C cpp_thread_test dgemm_tester

103
.github/workflows/dynamic_arch.yml vendored Normal file
View File

@ -0,0 +1,103 @@
name: continuous build
on: [push, pull_request]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
fortran: [gfortran, flang]
build: [cmake, make]
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Compilation cache
uses: actions/cache@v2
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
key: ${{ runner.os }}-ccache-${{ github.sha }}
# Restore any ccache cache entry, if none for
# ${{ runner.os }}-ccache-${{ github.sha }} exists
restore-keys: |
${{ runner.os }}-ccache-
- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
elif [ "$RUNNER_OS" == "macOS" ]; then
sysctl -a | grep machdep.cpu
else
echo "$RUNNER_OS not supported"
exit 1
fi
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get install -y gfortran cmake ccache
elif [ "$RUNNER_OS" == "macOS" ]; then
brew install coreutils cmake ccache
else
echo "$RUNNER_OS not supported"
exit 1
fi
ccache -M 300M # Limit the ccache size; Github's overall cache limit is 5GB
- name: gfortran build
if: matrix.build == 'make' && matrix.fortran == 'gfortran'
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
export PATH="/usr/lib/ccache:${PATH}"
elif [ "$RUNNER_OS" == "macOS" ]; then
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}"
else
echo "$RUNNER_OS not supported"
exit 1
fi
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0
- name: flang build
if: matrix.build == 'make' && matrix.fortran == 'flang'
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
export PATH="/usr/lib/ccache:${PATH}"
elif [ "$RUNNER_OS" == "macOS" ]; then
exit 0
else
echo "$RUNNER_OS not supported"
exit 1
fi
cd /usr/
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz
sudo tar xf flang-20190329-x86-70.tgz
sudo rm flang-20190329-x86-70.tgz
cd -
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC=flang
- name: CMake gfortran build
if: matrix.build == 'cmake' && matrix.fortran == 'gfortran'
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
export PATH="/usr/lib/ccache:${PATH}"
elif [ "$RUNNER_OS" == "macOS" ]; then
export PATH="$(brew --prefix)/opt/ccache/libexec:${PATH}"
else
echo "$RUNNER_OS not supported"
exit 1
fi
mkdir build
cd build
cmake -DDYNAMIC_ARCH=1 -DNOFORTRAN=0 -DBUILD_WITHOUT_LAPACK=0 -DCMAKE_VERBOSE_MAKEFILE=ON -DCMAKE_BUILD_TYPE=Release ..
make -j$(nproc)

View File

@ -21,6 +21,7 @@ jobs:
build-OpenBLAS-with-Homebrew:
runs-on: macos-latest
env:
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer
HOMEBREW_DEVELOPER: "ON"
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON"
HOMEBREW_NO_ANALYTICS: "ON"

2
.gitignore vendored
View File

@ -70,6 +70,7 @@ test/SBLAT2.SUMM
test/SBLAT3.SUMM
test/ZBLAT2.SUMM
test/ZBLAT3.SUMM
test/SHBLAT3.SUMM
test/cblat1
test/cblat2
test/cblat3
@ -79,6 +80,7 @@ test/dblat3
test/sblat1
test/sblat2
test/sblat3
test/test_shgemm
test/zblat1
test/zblat2
test/zblat3

View File

@ -16,7 +16,6 @@ matrix:
before_script: &common-before
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
script:
- set -e
- make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
@ -34,6 +33,28 @@ matrix:
- TARGET_BOX=PPC64LE_LINUX
- BTYPE="BINARY=64 USE_OPENMP=1"
- <<: *test-ubuntu
os: linux
arch: s390x
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
env:
# for matrix annotation only
- TARGET_BOX=IBMZ_LINUX
- BTYPE="BINARY=64 USE_OPENMP=1"
- <<: *test-ubuntu
os: linux
dist: focal
arch: s390x
compiler: clang
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
env:
# for matrix annotation only
- TARGET_BOX=IBMZ_LINUX
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang"
- <<: *test-ubuntu
env:
- TARGET_BOX=LINUX64
@ -66,6 +87,40 @@ matrix:
- TARGET_BOX=LINUX32
- BTYPE="BINARY=32"
- os: linux
arch: ppc64le
dist: bionic
compiler: gcc
before_script:
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
- sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y
script:
- make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX_P9
- os: linux
arch: ppc64le
dist: bionic
compiler: gcc
before_script:
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
- sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y
script:
- make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX_P9
- os: linux
compiler: gcc
addons:
@ -98,7 +153,6 @@ matrix:
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
before_script: *common-before
script:
- set -e
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
@ -141,7 +195,6 @@ matrix:
before_script:
- COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
script:
- set -e
- mkdir build
- CONFIG=Release
- cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
@ -168,6 +221,17 @@ matrix:
env:
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"
- <<: *test-macos
osx_image: xcode12
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
- brew update
- brew install gcc@10 # for gfortran
script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
- <<: *test-macos
osx_image: xcode10.0
env:
@ -180,6 +244,27 @@ matrix:
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
- <<: *test-macos
osx_image: xcode10.1
env:
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
- &test-graviton2
os: linux
arch: arm64-graviton2
dist: focal
group: edge
virt: lxd
compiler: gcc
addons:
apt:
packages:
- gfortran
script:
- travis_wait 45 make && make lapack-test
# whitelist
branches:
only:

View File

@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 9.dev)
set(OpenBLAS_PATCH_VERSION 10.dev)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
# Adhere to GNU filesystem layout conventions
@ -23,11 +23,14 @@ option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS fun
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64 or ppc only)" OFF)
option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF)
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
else()
set(NO_AFFINITY 1)
endif()
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoids conflicts with other BLAS libraries, especially when using
@ -86,9 +89,13 @@ if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
endif ()
if (NOT DEFINED BUILD_BFLOAT16)
set (BUILD_BFLOAT16 false)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
# set(BUILD_BFLOAT16 true)
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
@ -120,6 +127,11 @@ if (BUILD_COMPLEX16)
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()
if (BUILD_BFLOAT16)
message(STATUS "Building Half Precision")
list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
endif ()
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
endif ()
@ -223,6 +235,10 @@ if (NOT MSVC AND NOT NOFORTRAN)
if(NOT NO_CBLAS)
add_subdirectory(ctest)
endif()
add_subdirectory(lapack-netlib/TESTING)
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
add_subdirectory(cpp_thread_test)
endif()
endif()
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
@ -234,11 +250,11 @@ if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
if (NOT MSVC)
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
else()
target_link_libraries(${OpenBLAS_LIBNAME} "/FORCE:MULTIPLE")
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE")
endif()
endif()
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
@ -347,10 +363,21 @@ endif()
if(NOT NO_CBLAS)
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
if (NOT ${SYMBOLPREFIX} STREQUAL "")
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
endif()
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
endif()
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
@ -367,11 +394,9 @@ if(NOT NO_LAPACKE)
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
endif()
include(FindPkgConfig QUIET)
if(PKG_CONFIG_FOUND)
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
endif()
# Install pkg-config files
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".

View File

@ -180,3 +180,14 @@ In chronological order:
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
* [2020-01-07] optimize AVX2 SGEMM and STRMM
* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR>
* [2020-04-15] Half-precision GEMM for bfloat16
* Marius Hillenbrand <https://github.com/mhillenibm>
* [2020-05-12] Revise dynamic architecture detection for IBM z
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
* Danfeng Zhang <https://github.com/craft-zhang>
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53

View File

@ -1,4 +1,121 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.10
14-Jun-2020
common:
* Improved thread locking behaviour in blas_server and parallel getrf
* Imported bugfix 394 from LAPACK (spurious reference to "XERBL"
due to overlong lines)
* Imported bugfix 403 from LAPACK (compile option "recursive" required
for correctness with Intel and PGI)
* Imported bugfix 408 from LAPACK (wrong scaling in ZHEEQUB)
* Imported bugfix 411 from LAPACK (infinite loop in LARGV/LARTG/LARTGP)
* Fixed mismatches between BUFFERSIZE and GEMM_UNROLL parameters that
could lead to crashes at large matrix sizes
* Restored internal soname in dynamic libraries on FreeBSD and Dragonfly
* Added API (openblas_setaffinity) to set the thread affinity on Linux
* Added initial infrastructure for half-precision floating point
(bfloat16) support with a generic implementation of SHGEMM
* Added CMAKE build system support for building the cblas_Xgemm3m
functions
* Fixed CMAKE support for building in a path with embedded spaces
* Fixed CMAKE (non)handling of NO_EXPRECISION and MAX_STACK_ALLOC
* Fixed GCC version detection in the Makefiles
* Allowed overriding the names of AR, AS and LD in Makefile builds
POWER:
* Fixed big-endian POWER8 ELFv2 builds on FreeBSD
* Fixed GCC version checks and DYNAMIC_ARCH builds on POWER9
* Fixed CMAKE build support for POWER9
* fixed a potential race condition in the thread buffer allocation
* Worked around LAPACK test failures on PPC G4
MIPS:
* Fixed a potential race condition in the thread buffer allocation
* Added support for MIPS 24K/24KE family based on P5600 kernels
MIPS64:
* fixed a potential race condition in the thread buffer allocation
* Added TARGET=GENERIC
ARMV7:
* Fixed a race condition in the thread buffer allocation
ARMV8:
* Fixed a race condition in the thread buffer allocation
* Fixed zero initialisation in the assembly for SGEMM and DGEMM BETA
* Improved performance of the ThunderX2 DAXPY kernel
* Added an optimized SGEMM kernel for Cortex A53
* Fixed Makefile support for INTERFACE64 (8-byte integer)
x86_64:
* Fixed a syntax error in the CMAKE setup for SkylakeX
* Improved performance of STRSM on Haswell, SkylakeX and Ryzen
* Improved SGEMM performance on SGEMM for workloads with ldc a
multiple of 1024
* Improved DGEMM performance on Skylake X
* Fixed unwanted AVX512-dependency of SGEMM in DYNAMIC_ARCH
builds created on SkylakeX
* Removed data alignment requirement in the SSE2 copy kernels
that could cause spurious crashes
* Added a workaround for an optimizer bug in AppleClang 11.0.3
* Fixed LAPACK test failures due to wrong options for Intel Fortran
* Fixed compilation and LAPACK test results with recent Flang
and AMD AOCC
* Fixed DYNAMIC_ARCH builds with CMAKE on OS X
* Fixed missing exports of cblas_i?amin, cblas_i?min, cblas_i?max,
cblas_?sum, cblas_?gemm3m in the shared library on OS
* Fixed reporting of cpu name in DYNAMIC_ARCH builds (would sometimes
show the name of an older generation chip supported by the same kernels)
IBM Z:
* Improved performance of SGEMM/STRMM and DGEMM/DTRMM on Z14
====================================================================
Version 0.3.9
1-Mar-2020
common:
* Fixed a miscompilation of the GETRF functions with CMAKE
* Imported bugfix 390 from LAPACK (missing NaN propagation in xCOMBSSQ)
* The size of the memory buffer used for splitting GEMM tasks across
multiple threads can now be configured in the build system.
POWER:
* Fixed several compilation problems related to endianness
and ELF version on POWER8 and POWER9
* Fixed use of the absolute value IAMIN/IAMAX instead of IMIN/IMAX
* Fixed a race condition in the level3 blas code
MIPS64:
* Fixed use of the absoltute value IAMIN/IAMAX instead of IMIN/IMAX
ARMV7:
* Fixed a race condition in the level3 blas code
* Fixed compilation on Android
ARMV8:
* Added support for Ampere EMAG8180
* Added support for Neoverse N1
* Improved performance of the blas_lock function
* Fixed a race condition in the level3 blas code
* Fixed a performance regression on TSV110-based servers
x86_64:
* Fixed a long-standing error with undeclared register overwrites
in the DSCAL microkernel for HASWELL,SKYLAKEX and ZEN
* Fixed a long-standing bug in the SSE implementation of IAMAX
* Fixed a CMAKE build failure with DYNAMIC_ARCH
* Fixed cpu autodetection of Goldmont+, Cannon Lake and Ice Lake
* Fixed a compilation failure on OSX with compiler name containing dash
* Fixed compilation with MinGW on SkylakeX
* Improved speed of the AVX512 GEMM3M kernel on SkylakeX
* Added an AVX512 STRMM kernel for SkylakeX
* Improved GEMM performance on Haswell and Zen
zarch:
* fixed compilation of the DYNAMIC_ARCH code
====================================================================
Version 0.3.8
9-Feb-2020

9
Jenkinsfile vendored Normal file
View File

@ -0,0 +1,9 @@
node {
stage('Checkout') {
checkout
}
stage('Build') {
sh("make")
}
}

View File

@ -56,10 +56,21 @@ ifneq ($(INTERFACE64), 0)
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
endif
endif
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
@$(CC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
cverinfo=`$(CC) --version | sed -n '1p'`; \
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
else \
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
fi
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
@$(FC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
fverinfo=`$(FC) --version | sed -n '1p'`; \
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
else \
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
fi
endif
ifneq ($(OSNAME), AIX)
@echo -n " Library Name ... $(LIBNAME)"
@ -68,9 +79,13 @@ else
endif
ifndef SMP
@echo " (Single threaded) "
@echo " (Single-threading) "
else
@echo " (Multi threaded; Max num-threads is $(NUM_THREADS))"
@echo " (Multi-threading; Max num-threads is $(NUM_THREADS))"
endif
ifeq ($(DYNAMIC_ARCH), 1)
@echo " Supporting multiple $(ARCH) cpu models with minimum requirement for the common code being $(CORE)"
endif
ifeq ($(USE_OPENMP), 1)
@ -97,12 +112,12 @@ endif
shared :
ifneq ($(NO_SHARED), 1)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
@ -126,7 +141,7 @@ ifndef NO_FBLAS
$(MAKE) -C test all
endif
$(MAKE) -C utest all
ifndef NO_CBLAS
ifneq ($(NO_CBLAS), 1)
$(MAKE) -C ctest all
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
$(MAKE) -C cpp_thread_test all
@ -229,7 +244,7 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
endif
ifndef NO_LAPACKE
ifneq ($(NO_LAPACKE), 1)
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
endif
endif
@ -249,6 +264,7 @@ lapack_prebuild :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
@ -285,6 +301,18 @@ else
endif
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_SINGLE), 1)
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_DOUBLE), 1)
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX), 1)
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX16), 1)
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
@ -317,7 +345,7 @@ lapack-test :
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
( cd $(NETLIB_LAPACK_DIR)/INSTALL; $(MAKE) all; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
endif
@ -349,11 +377,12 @@ clean ::
@$(MAKE) -C kernel clean
#endif
@$(MAKE) -C reference clean
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0
ifeq ($(OSNAME), Darwin)
@rm -rf getarch.dSYM getarch_2nd.dSYM
endif
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
@rm -f cblas.tmp cblas.tmp2
@touch $(NETLIB_LAPACK_DIR)/make.inc
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h

View File

@ -24,6 +24,23 @@ CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
endif
# Use a72 tunings because Neoverse-N1 is only available
# in GCC>=9
ifeq ($(CORE), NEOVERSEN1)
ifeq ($(GCCVERSIONGTEQ7), 1)
ifeq ($(GCCVERSIONGTEQ9), 1)
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
@ -39,6 +56,21 @@ CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
ifeq ($(CORE), THUNDERX3T110)
ifeq ($(GCCVERSIONGTEQ10), 1)
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
else
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
endif
ifeq ($(CORE), VORTEX)
CCOMMON_OPT += -march=armv8.3-a
FCOMMON_OPT += -march=armv8.3-a
endif
ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq ($(CORE), TSV110)
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110

View File

@ -13,6 +13,14 @@ OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
PKG_EXTRALIB := $(EXTRALIB)
ifeq ($(USE_OPENMP), 1)
ifeq ($(C_COMPILER), PGI)
PKG_EXTRALIB += -lomp
else
PKG_EXTRALIB += -lgomp
endif
endif
.PHONY : install
.NOTPARALLEL : install
@ -45,7 +53,22 @@ install : lib.grd
ifndef NO_CBLAS
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
@cp cblas.h cblas.tmp
ifdef SYMBOLPREFIX
@sed 's/cblas[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp > cblas.tmp2
@sed 's/openblas[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
#change back any openblas_complex_float and double that got hit
@sed 's/$(SYMBOLPREFIX)openblas_complex_/openblas_complex_/g' cblas.tmp > cblas.tmp2
@sed 's/goto[^( ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
endif
ifdef SYMBOLSUFFIX
@sed 's/cblas[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp > cblas.tmp2
@sed 's/openblas[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
#change back any openblas_complex_float and double that got hit
@sed 's/\(openblas_complex_\)\([^ ]*\)$(SYMBOLSUFFIX)/\1\2 /g' cblas.tmp > cblas.tmp2
@sed 's/goto[^( ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
endif
@sed 's/common/openblas_config/g' cblas.tmp > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
endif
ifneq ($(OSNAME), AIX)
@ -68,14 +91,14 @@ endif
#for install shared library
ifneq ($(NO_SHARED),1)
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ -132,7 +155,7 @@ endif
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@ -168,4 +191,3 @@ endif
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo Install OK!

View File

@ -9,23 +9,71 @@ else
USE_OPENMP = 1
endif
ifeq ($(CORE), POWER10)
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
endif
ifeq ($(CORE), POWER9)
ifeq ($(USE_OPENMP), 1)
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
ifeq ($(C_COMPILER), GCC)
ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
CCOMMON_OPT += -mcpu=power8 -mtune=power8
else
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
CCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
CCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
endif
ifneq ($(F_COMPILER), PGI)
FCOMMON_OPT += -O2 -frecursive -fno-fast-math
ifeq ($(C_COMPILER), GCC)
ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
FCOMMON_OPT += -mcpu=power8 -mtune=power8
else
FCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
FCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
FCOMMON_OPT += -O2 -Mrecursive
endif
endif
ifeq ($(CORE), POWER8)
ifeq ($(USE_OPENMP), 1)
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
else
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
endif
ifneq ($(F_COMPILER), PGI)
ifeq ($(OSNAME), AIX)
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
endif
else
FCOMMON_OPT += -O2 -Mrecursive
endif
endif
ifeq ($(USE_OPENMP), 1)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
else
CCOMMON_OPT += -DUSE_OPENMP -mp
endif
ifneq ($(F_COMPILER), PGI)
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
else
FCOMMON_OPT += -DUSE_OPENMP -mp
endif
endif
@ -68,6 +116,9 @@ CCOMMON_OPT += -mpowerpc64 -maix64
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mpowerpc64 -maix64
endif
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -mpowerpc64 -maix64
endif
ifeq ($(COMPILER_F77), xlf)
FCOMMON_OPT += -q64
endif

View File

@ -17,7 +17,11 @@ ifdef CPUIDEMU
EXFLAGS = -DCPUIDEMU -DVENDOR=99
endif
ifeq ($(TARGET), 1004K)
ifeq ($(TARGET), MIPS24K)
TARGET_FLAGS = -mips32r2
endif
ifeq ($(TARGET), MIPS1004K)
TARGET_FLAGS = -mips32r2
endif

View File

@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.3.9.dev
VERSION = 0.3.10.dev
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@ -272,7 +272,33 @@ COMMON_PROF = -pg
# work at all.
#
# CPP_THREAD_SAFETY_TEST = 1
#
# use this to run only the less memory-hungry GEMV test
# CPP_THREAD_SAFETY_GEMV = 1
# If you want to enable the experimental BFLOAT16 support
# BUILD_BFLOAT16 = 1
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
# will be allocated on the heap rather than the stack. (This array alone requires
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
# counts, but obviously it is not the only item that ends up on the stack.
# The default value of 32 ensures that the overall requirement is compatible
# with the default 1MB stacksize imposed by having the Java VM loaded without use
# of its -Xss parameter.
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
# BLAS3_MEM_ALLOC_THRESHOLD = 160
# the below is not yet configurable, use cmake if you need to build only select types
BUILD_SINGLE = 1
BUILD_DOUBLE = 1
BUILD_COMPLEX = 1
BUILD_COMPLEX16 = 1
# End of user configuration
#

View File

@ -9,7 +9,7 @@ ifndef TOPDIR
TOPDIR = .
endif
# If ARCH is not set, we use the host system's architecture for getarch compile options.
# If ARCH is not set, we use the host system's architecture for getarch compile options.
ifndef ARCH
HOSTARCH := $(shell uname -m)
else
@ -21,8 +21,14 @@ ifeq ($(ARCH), amd64)
override ARCH=x86_64
else ifeq ($(ARCH), powerpc64)
override ARCH=power
else ifeq ($(ARCH), powerpc)
override ARCH=power
else ifeq ($(ARCH), i386)
override ARCH=x86
else ifeq ($(ARCH), armv6)
override ARCH=arm
else ifeq ($(ARCH), armv7)
override ARCH=arm
else ifeq ($(ARCH), aarch64)
override ARCH=arm64
else ifeq ($(ARCH), zarch)
@ -67,6 +73,18 @@ endif
#
# Beginning of system configuration
#
ifneq ($(BUILD_SINGLE),1)
ifneq ($(BUILD_DOUBLE),1)
ifneq ($(BUILD_COMPLEX),1)
ifneq ($(BUILD_COMPLEX16),1)
override BUILD_SINGLE=1
override BUILD_DOUBLE=1
override BUILD_COMPLEX=1
override BUILD_COMPLEX16=1
endif
endif
endif
endif
ifndef HOSTCC
HOSTCC = $(CC)
@ -86,6 +104,9 @@ endif
ifeq ($(TARGET), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), COOPERLAKE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
@ -107,6 +128,9 @@ endif
ifeq ($(TARGET), ARMV8)
GETARCH_FLAGS := -DFORCE_ARMV7
endif
ifeq ($(TARGET), POWER8)
GETARCH_FLAGS := -DFORCE_POWER6
endif
endif
@ -125,6 +149,9 @@ endif
ifeq ($(TARGET_CORE), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), COOPERLAKE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
@ -261,10 +288,10 @@ endif
ARFLAGS =
CPP = $(COMPILER) -E
AR = $(CROSS_SUFFIX)ar
AS = $(CROSS_SUFFIX)as
LD = $(CROSS_SUFFIX)ld
RANLIB = $(CROSS_SUFFIX)ranlib
AR ?= $(CROSS_SUFFIX)ar
AS ?= $(CROSS_SUFFIX)as
LD ?= $(CROSS_SUFFIX)ld
RANLIB ?= $(CROSS_SUFFIX)ranlib
NM = $(CROSS_SUFFIX)nm
DLLWRAP = $(CROSS_SUFFIX)dllwrap
OBJCOPY = $(CROSS_SUFFIX)objcopy
@ -277,6 +304,25 @@ NO_LAPACK = 1
override FEXTRALIB =
endif
ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
GCCVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 7)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
GCCVERSIONGTEQ11 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 11)
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
# Note that the behavior of -dumpversion is compile-time-configurable for
# gcc-7.x and newer. Use -dumpfullversion there
ifeq ($(GCCVERSIONGTEQ7),1)
GCCDUMPVERSION_PARAM := -dumpfullversion
else
GCCDUMPVERSION_PARAM := -dumpversion
endif
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
endif
#
# OS dependent settings
#
@ -323,13 +369,7 @@ ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -DMS_ABI
endif
ifeq ($(C_COMPILER), GCC)
#Version tests for supporting specific features (MS_ABI, POWER9 intrinsics)
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGT4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 4)
GCCVERSIONGT5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \> 5)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGT4), 1)
# GCC Major version > 4
# It is compatible with MSVC ABI.
@ -343,7 +383,6 @@ ifeq ($(GCCMINORVERSIONGTEQ7), 1)
CCOMMON_OPT += -DMS_ABI
endif
endif
endif
# Ensure the correct stack alignment on Win32
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
@ -535,7 +574,7 @@ DYNAMIC_CORE += HASWELL ZEN
endif
ifneq ($(NO_AVX512), 1)
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += SKYLAKEX
DYNAMIC_CORE += SKYLAKEX COOPERLAKE
endif
endif
endif
@ -554,23 +593,54 @@ DYNAMIC_CORE += CORTEXA53
DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += CORTEXA72
DYNAMIC_CORE += CORTEXA73
DYNAMIC_CORE += NEOVERSEN1
DYNAMIC_CORE += FALKOR
DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99
DYNAMIC_CORE += TSV110
DYNAMIC_CORE += EMAG8180
DYNAMIC_CORE += THUNDERX3T110
endif
ifeq ($(ARCH), zarch)
DYNAMIC_CORE = Z13
DYNAMIC_CORE += Z14
DYNAMIC_CORE = ZARCH_GENERIC
# if the compiler accepts -march=arch11 or -march=z13 and can compile a file
# with z13-specific inline assembly, then we can include support for Z13.
# note: -march=z13 is equivalent to -march=arch11 yet some compiler releases
# only support one or the other.
# note: LLVM version 6.x supported -march=z13 yet could not handle vector
# registers in inline assembly, so the check for supporting the -march flag is
# not enough.
ZARCH_TEST_COMPILE=-c $(TOPDIR)/kernel/zarch/damin_z13.c -I$(TOPDIR) -o /dev/null > /dev/null 2> /dev/null
ZARCH_CC_SUPPORTS_ARCH11=$(shell $(CC) -march=arch11 $(ZARCH_TEST_COMPILE) && echo 1)
ZARCH_CC_SUPPORTS_Z13=$(shell $(CC) -march=z13 $(ZARCH_TEST_COMPILE) && echo 1)
ifeq ($(or $(ZARCH_CC_SUPPORTS_ARCH11), $(ZARCH_CC_SUPPORTS_Z13)), 1)
DYNAMIC_CORE += Z13
CCOMMON_OPT += -DDYN_Z13
else
$(info OpenBLAS: Not building Z13 kernels because the compiler $(CC) does not support it)
endif
# as above for z13, check for -march=arch12 and z14 support in the compiler.
ZARCH_CC_SUPPORTS_ARCH12=$(shell $(CC) -march=arch12 $(ZARCH_TEST_COMPILE) && echo 1)
ZARCH_CC_SUPPORTS_Z14=$(shell $(CC) -march=z14 $(ZARCH_TEST_COMPILE) && echo 1)
ifeq ($(or $(ZARCH_CC_SUPPORTS_ARCH12), $(ZARCH_CC_SUPPORTS_Z14)), 1)
DYNAMIC_CORE += Z14
CCOMMON_OPT += -DDYN_Z14
else
$(info OpenBLAS: Not building Z14 kernels because the compiler $(CC) does not support it)
endif
endif # ARCH zarch
ifeq ($(ARCH), power)
DYNAMIC_CORE = POWER6
DYNAMIC_CORE += POWER8
ifneq ($(C_COMPILER), GCC)
DYNAMIC_CORE += POWER9
DYNAMIC_CORE += POWER10
endif
ifeq ($(C_COMPILER), GCC)
ifeq ($(GCCVERSIONGT5), 1)
@ -578,6 +648,15 @@ DYNAMIC_CORE += POWER9
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
endif
ifeq ($(GCCVERSIONGTEQ11), 1)
DYNAMIC_CORE += POWER10
else ifeq ($(GCCVERSIONGTEQ10), 1)
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
DYNAMIC_CORE += POWER10
endif
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
endif
endif
endif
@ -633,6 +712,16 @@ endif
ifeq ($(ARCH), arm64)
NO_BINARY_MODE = 1
BINARY_DEFINED = 1
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -fdefault-integer-8
endif
ifeq ($(F_COMPILER), FLANG)
FCOMMON_OPT += -i8
endif
endif
endif
endif
@ -678,7 +767,12 @@ CCOMMON_OPT += -march=mips64
FCOMMON_OPT += -march=mips64
endif
ifeq ($(CORE), 1004K)
ifeq ($(CORE), MIPS24K)
CCOMMON_OPT += -mips32r2 -mtune=24kc $(MSA_FLAGS)
FCOMMON_OPT += -mips32r2 -mtune=24kc $(MSA_FLAGS)
endif
ifeq ($(CORE), MIPS1004K)
CCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
FCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
endif
@ -725,8 +819,19 @@ endif
ifeq ($(C_COMPILER), PGI)
ifdef BINARY64
ifeq ($(ARCH), x86_64)
CCOMMON_OPT += -tp p7-64 -D__MMX__ -Mnollvm
else
ifeq ($(ARCH), power)
ifeq ($(CORE), POWER8)
CCOMMON_OPT += -tp pwr8
endif
ifeq ($(CORE), POWER9)
CCOMMON_OPT += -tp pwr9
endif
endif
endif
else
CCOMMON_OPT += -tp p7
endif
endif
@ -745,6 +850,15 @@ endif
ifeq ($(F_COMPILER), FLANG)
CCOMMON_OPT += -DF_INTERFACE_FLANG
FCOMMON_OPT += -Mrecursive -Kieee
ifeq ($(OSNAME), Linux)
ifeq ($(ARCH), x86_64)
FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
ifeq ($(FLANG_VENDOR),AOCC)
FCOMMON_OPT += -fno-unroll-loops
endif
endif
endif
ifdef BINARY64
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
@ -842,6 +956,7 @@ ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -i8
endif
endif
FCOMMON_OPT += -recursive -fp-model strict -assume protect-parens
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -fopenmp
endif
@ -881,10 +996,22 @@ ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -i8
endif
endif
ifeq ($(ARCH), x86_64)
FCOMMON_OPT += -tp p7-64
else
ifeq ($(ARCH), power)
ifeq ($(CORE), POWER8)
FCOMMON_OPT += -tp pwr8
endif
ifeq ($(CORE), POWER9)
FCOMMON_OPT += -tp pwr9
endif
endif
endif
else
FCOMMON_OPT += -tp p7
endif
FCOMMON_OPT += -Mrecursive
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -mp
endif
@ -1109,6 +1236,22 @@ ifeq ($(USE_TLS), 1)
CCOMMON_OPT += -DUSE_TLS
endif
ifeq ($(BUILD_BFLOAT16), 1)
CCOMMON_OPT += -DBUILD_BFLOAT16
endif
ifeq ($(BUILD_SINGLE), 1)
CCOMMON_OPT += -DBUILD_SINGLE=1
endif
ifeq ($(BUILD_DOUBLE), 1)
CCOMMON_OPT += -DBUILD_DOUBLE=1
endif
ifeq ($(BUILD_COMPLEX), 1)
CCOMMON_OPT += -DBUILD_COMPLEX=1
endif
ifeq ($(BUILD_COMPLEX16), 1)
CCOMMON_OPT += -DBUILD_COMPLEX16=1
endif
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
ifndef SYMBOLPREFIX
@ -1135,6 +1278,9 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
include $(TOPDIR)/Makefile.$(ARCH)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
endif
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
ifeq ($(CORE), PPC440)
@ -1227,7 +1373,6 @@ endif
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =
@ -1334,6 +1479,7 @@ export ARCH
export CORE
export LIBCORE
export __BYTE_ORDER__
export ELF_VERSION
export PGCPATH
export CONFIG
export CC
@ -1379,7 +1525,10 @@ export KERNELDIR
export FUNCTION_PROFILE
export TARGET_CORE
export NO_AVX512
export BUILD_BFLOAT16
export SBGEMM_UNROLL_M
export SBGEMM_UNROLL_N
export SGEMM_UNROLL_M
export SGEMM_UNROLL_N
export DGEMM_UNROLL_M

View File

@ -1,16 +1,18 @@
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
SBEXTOBJS_P = $(SBEXTOBJS:.$(SUFFIX)=.$(PSUFFIX))
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
BLASOBJS = $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
BLASOBJS_P = $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
BLASOBJS = $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
BLASOBJS_P = $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
ifdef EXPRECISION
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
@ -22,19 +24,23 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
libs :: $(BLASOBJS) $(COMMONOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

View File

@ -54,3 +54,19 @@ LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
else
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
endif
ifdef HAVE_SSE3
ifndef DYNAMIC_ARCH
CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3
ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3
FCOMMON_OPT += -mssse3
endif
ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1
FCOMMON_OPT += -msse4.1
endif
endif
endif

View File

@ -8,6 +8,21 @@ endif
endif
endif
ifdef HAVE_SSE3
ifndef DYNAMIC_ARCH
CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3
ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3
FCOMMON_OPT += -mssse3
endif
ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1
FCOMMON_OPT += -msse4.1
endif
endif
endif
ifeq ($(CORE), SKYLAKEX)
ifndef DYNAMIC_ARCH
ifndef NO_AVX512
@ -15,28 +30,66 @@ CCOMMON_OPT += -march=skylake-avx512
FCOMMON_OPT += -march=skylake-avx512
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
endif
ifeq ($(CORE), HASWELL)
ifeq ($(CORE), COOPERLAKE)
ifndef DYNAMIC_ARCH
ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC)
# cooperlake support was added in 10.1
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1)
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
CCOMMON_OPT += -march=cooperlake
FCOMMON_OPT += -march=cooperlake
endif
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
endif
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
ifndef DYNAMIC_ARCH
ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
CCOMMON_OPT += -mavx2
endif
endif
ifeq ($(F_COMPILER), GFORTRAN)
# AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
FCOMMON_OPT += -mavx2
endif
endif
endif
endif
endif

View File

@ -5,6 +5,12 @@ FCOMMON_OPT += -march=z13 -mzvector
endif
ifeq ($(CORE), Z14)
CCOMMON_OPT += -march=z14 -mzvector
CCOMMON_OPT += -march=z14 -mzvector -O3
FCOMMON_OPT += -march=z14 -mzvector
endif
# Enable floating-point expression contraction for clang, since it is the
# default for gcc
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -ffp-contract=on
endif

View File

@ -6,8 +6,11 @@ Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.svg?branch=dev
AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n8nkkx64/branch/develop?svg=true)](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
Drone CI: [![Build Status](https://cloud.drone.io/api/badges/xianyi/OpenBLAS/status.svg?branch=develop)](https://cloud.drone.io/xianyi/OpenBLAS/)
[![Build Status](https://dev.azure.com/xianyi/OpenBLAS/_apis/build/status/xianyi.OpenBLAS?branchName=develop)](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop)
## Introduction
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
@ -25,7 +28,8 @@ You can download them from [file hosting on sourceforge.net](https://sourceforge
## Installation from Source
Download from project homepage, https://xianyi.github.com/OpenBLAS/, or check out the code
using Git from https://github.com/xianyi/OpenBLAS.git.
using Git from https://github.com/xianyi/OpenBLAS.git. (If you want the most up to date version, be
sure to use the develop branch - master is several years out of date due to a change of maintainership.)
Buildtime parameters can be chosen in Makefile.rule, see there for a short description of each option.
Most can also be given directly on the make or cmake command line.
@ -42,7 +46,10 @@ Building OpenBLAS requires the following to be installed:
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
The full target list is in the file `TargetList.txt`.
The full target list is in the file `TargetList.txt`. For building with `cmake`, the
usual conventions apply, i.e. create a build directory either underneath the toplevel
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path
to the source tree and any build options you plan to set.
### Cross compile
@ -55,6 +62,10 @@ Examples:
```sh
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
```
or same with the newer mips-crosscompiler put out by Loongson that defaults to the 32bit ABI:
```sh
make HOSTCC=gcc CC='/opt/mips-loongson-gcc7.3-linux-gnu/2019.06-29/bin/mips-linux-gnu-gcc -mabi=64' FC='/opt/mips-loongson-gcc7.3-linux-gnu/2019.06-29/bin/mips-linux-gnu-gfortran -mabi=64' TARGET=LOONGSON3A
```
* On an x86 box, compile this library for a loongson3a CPU with loongcc (based on Open64) compiler:
```sh
@ -119,6 +130,11 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
- **AMD ZEN**: Uses Haswell codes with some optimizations.
#### MIPS32
- **MIPS 1004K**: uses P5600 codes
- **MIPS 24K**: uses P5600 codes
#### MIPS64
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
@ -139,12 +155,17 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **Falkor**: same as A57 (different cpu specifications)
- **ThunderX**: Optimized some Level-1 functions
- **ThunderX2T99**: Optimized Level-3 BLAS and parts of Levels 1 and 2
- **ThunderX3T110**
- **TSV110**: Optimized some Level-3 helper functions
- **EMAG 8180**: preliminary support based on A57
- **Neoverse N1**: (AWS Graviton2) preliminary support
- **Apple Vortex**: preliminary support based on ARMV8
#### PPC/PPC64
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
- **POWER10**:
#### IBM zEnterprise System
@ -153,13 +174,18 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
### Support for multiple targets in a single library
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying DYNAMIC_ARCH=1 in Makefile.rule, on the gmake command line or as -DDYNAMIC_ARCH=TRUE in cmake.
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify DYNAMIC_OLDER=1, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option DYNAMIC_LIST that allows to specify an individual list of targets to include instead of the default.
DYNAMIC_ARCH is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.
`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano.
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus.
For **POWER**, the list encompasses POWER6, POWER8 and POWER9, on **ZARCH** it comprises Z13 and Z14.
The TARGET option can be used in conjunction with DYNAMIC_ARCH=1 to specify which cpu model should be assumed for all the
The `TARGET` option can be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the
common code in the library, usually you will want to set this to the oldest model you expect to encounter.
Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library.
@ -207,7 +233,8 @@ We provide the following functions to control the number of threads at runtime:
void goto_set_num_threads(int num_threads);
void openblas_set_num_threads(int num_threads);
```
Note that these are only used once at library initialization, and are not available for
fine-tuning thread numbers in individual BLAS calls.
If you compile this library with `USE_OPENMP=1`, you should use the above functions too.
## Reporting bugs

View File

@ -22,6 +22,7 @@ SANDYBRIDGE
HASWELL
SKYLAKEX
ATOM
COOPERLAKE
b)AMD CPU:
ATHLON
@ -49,6 +50,7 @@ POWER6
POWER7
POWER8
POWER9
POWER10
PPCG4
PPC970
PPC970MP
@ -58,7 +60,8 @@ CELL
3.MIPS CPU:
P5600
1004K
MIPS1004K
MIPS24K
4.MIPS64 CPU:
SICORTEX
@ -88,10 +91,14 @@ CORTEXA53
CORTEXA57
CORTEXA72
CORTEXA73
NEOVERSEN1
EMAG8180
FALKOR
THUNDERX
THUNDERX2T99
TSV110
THUNDERX3T110
VORTEX
9.System Z:
ZARCH_GENERIC

View File

@ -49,3 +49,23 @@ jobs:
# we need a privileged docker run for sde process attachment
docker run --privileged intel_sde
displayName: 'Run AVX512 SkylakeX docker build / test'
- job: Windows_cl
pool:
vmImage: 'windows-latest'
steps:
- task: CMake@1
inputs:
workingDirectory: 'build' # Optional
cmakeArgs: '-G "Visual Studio 16 2019" ..'
- task: CMake@1
inputs:
cmakeArgs: '--build . --config Release'
workingDirectory: 'build'
- script: |
cd build
cd utest
dir
openblas_utest.exe

File diff suppressed because it is too large Load Diff

191
benchmark/amax.c Normal file
View File

@ -0,0 +1,191 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef AMAX
#ifdef COMPLEX
#ifdef DOUBLE
#define AMAX BLASFUNC(dzamax)
#else
#define AMAX BLASFUNC(scamax)
#endif
#else
#ifdef DOUBLE
#define AMAX BLASFUNC(damax)
#else
#define AMAX BLASFUNC(samax)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
AMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

192
benchmark/amin.c Normal file
View File

@ -0,0 +1,192 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef AMIN
#ifdef COMPLEX
#ifdef DOUBLE
#define AMIN BLASFUNC(dzamin)
#else
#define AMIN BLASFUNC(scamin)
#endif
#else
#ifdef DOUBLE
#define AMIN BLASFUNC(damin)
#else
#define AMIN BLASFUNC(samin)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
AMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -128,8 +128,13 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop;
double time1,timeg;
#else
struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg;
#endif
argc--;argv++;
@ -147,7 +152,7 @@ int main(int argc, char *argv[]){
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -160,26 +165,30 @@ int main(int argc, char *argv[]){
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &start, (struct timezone *)0);
#else
clock_gettime(CLOCK_REALTIME, &start);
#endif
result = ASUM (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
clock_gettime(CLOCK_REALTIME, &stop);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
#else
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
#endif
timeg += time1;
}
if (loops >1)
timeg /= loops;
#ifdef COMPLEX

202
benchmark/axpby.c Normal file
View File

@ -0,0 +1,202 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef AXPBY
#ifdef COMPLEX
#ifdef DOUBLE
#define AXPBY BLASFUNC(zaxpby)
#else
#define AXPBY BLASFUNC(caxpby)
#endif
#else
#ifdef DOUBLE
#define AXPBY BLASFUNC(daxpby)
#else
#define AXPBY BLASFUNC(saxpby)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
FLOAT beta[2] = {2.0, 2.0};
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (l=0; l<loops; l++)
{
gettimeofday( &start, (struct timezone *)0);
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
(COMPSIZE * COMPSIZE * 4. - COMPSIZE) * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -128,7 +128,7 @@ int main(int argc, char *argv[]){
int to = 200;
int step = 1;
struct timeval start, stop;
struct timespec start, stop;
double time1,timeg;
argc--;argv++;
@ -151,7 +151,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -175,13 +175,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
clock_gettime( CLOCK_REALTIME, &start);
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
clock_gettime( CLOCK_REALTIME, &stop);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
timeg += time1;
@ -190,7 +190,7 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
" %10.2f MFlops %10.9f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
}

View File

@ -173,46 +173,46 @@ int main(int argc, char *argv[]){
#ifndef COMPLEX
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[i + j * m] = 0.;
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
}
}
#else
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[(i + j * m) * 2 + 0] = 0.;
a[(i + j * m) * 2 + 1] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
}
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[(j + j * m) * 2 + 1] = 0.;
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
for(i = j + 1; i < m; i++) {
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[(j + j * m) * 2 + 1] = 0.;
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
for(i = j + 1; i < m; i++) {
a[(i + j * m) * 2 + 0] = 0.;
a[(i + j * m) * 2 + 1] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
}
}
}
@ -239,10 +239,13 @@ int main(int argc, char *argv[]){
for (j = 0; j < m; j++) {
for(i = 0; i <= j; i++) {
#ifndef COMPLEX
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
#else
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
#endif
}
}
@ -250,10 +253,13 @@ int main(int argc, char *argv[]){
for (j = 0; j < m; j++) {
for(i = j; i < m; i++) {
#ifndef COMPLEX
if (maxerr < fabs(a[i + j * m] - b[i + j * m])) maxerr = fabs(a[i + j * m] - b[i + j * m]);
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
#else
if (maxerr < fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0])) maxerr = fabs(a[(i + j * m) * 2 + 0] - b[(i + j * m) * 2 + 0]);
if (maxerr < fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1])) maxerr = fabs(a[(i + j * m) * 2 + 1] - b[(i + j * m) * 2 + 1]);
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
#endif
}
}

View File

@ -154,7 +154,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -195,7 +195,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < to; j++){
for(i = 0; i < to * COMPSIZE; i++){
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -214,7 +214,7 @@ int main(int argc, char *argv[]){
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -39,6 +39,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DOUBLE
#define GEMM BLASFUNC(dgemm)
#elif defined(HALF)
#define GEMM BLASFUNC(sbgemm)
#else
#define GEMM BLASFUNC(sgemm)
#endif
@ -120,7 +122,8 @@ static void *huge_malloc(BLASLONG size){
int main(int argc, char *argv[]){
FLOAT *a, *b, *c;
IFLOAT *a, *b;
FLOAT *c;
FLOAT alpha[] = {1.0, 0.0};
FLOAT beta [] = {0.0, 0.0};
char transa = 'N';
@ -184,25 +187,25 @@ int main(int argc, char *argv[]){
k = to;
}
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) {
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) {
if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
for (i = 0; i < m * k * COMPSIZE; i++) {
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < k * n * COMPSIZE; i++) {
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < m * n * COMPSIZE; i++) {
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;

View File

@ -163,7 +163,7 @@ int main(int argc, char *argv[]){
loops = atoi(p);
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -181,9 +181,9 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

View File

@ -181,7 +181,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -197,7 +197,7 @@ int main(int argc, char *argv[]){
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
for(j = 0; j < m; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[j + i * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -234,7 +234,7 @@ int main(int argc, char *argv[]){
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
for(j = 0; j < m; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[j + i * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

View File

@ -165,7 +165,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -182,7 +182,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

View File

@ -165,7 +165,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -177,20 +177,20 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
b[i + j * m * COMPSIZE] = 0.0;
b[(long)i + (long)j * (long)m * COMPSIZE] = 0.0;
}
}
for (j = 0; j < m; ++j) {
for (i = 0; i < m * COMPSIZE; ++i) {
b[i] += a[i + j * m * COMPSIZE];
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
}
}

View File

@ -172,7 +172,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < to; j++){
for(i = 0; i < to * COMPSIZE; i++){
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -188,7 +188,7 @@ int main(int argc, char *argv[]){
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

210
benchmark/hbmv.c Normal file
View File

@ -0,0 +1,210 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HBMV
#ifdef DOUBLE
#define HBMV BLASFUNC(zhbmv)
#else
#define HBMV BLASFUNC(chbmv)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz) {
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size) {
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {0.0, 0.0};
blasint k = 1;
char uplo='L';
blasint m, i, j;
blasint inc_x=1, inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_K"))) k = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n",
from, to, step, uplo, k, inc_x, inc_y, loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) {
timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
for(j = 0; j < m; j++) {
for(i = 0; i < m * COMPSIZE; i++) {
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (l = 0; l < loops; l++) {
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr, " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -151,7 +151,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -164,9 +164,9 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -178,8 +178,6 @@ int main(int argc, char *argv[]){
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

View File

@ -152,7 +152,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -167,7 +167,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

186
benchmark/her.c Normal file
View File

@ -0,0 +1,186 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HER
#ifdef DOUBLE
#define HER BLASFUNC(zher)
#else
#define HER BLASFUNC(cher)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x;
FLOAT alpha[] = {1.0, 1.0};
blasint incx = 1;
char *p;
char uplo='U';
char trans='N';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
blasint m, i, j;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
fprintf(stderr, " %6d : ", (int)m);
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
HER (&uplo, &m, alpha, x, &incx, a, &m );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);
}
return 0;
}

190
benchmark/her2.c Normal file
View File

@ -0,0 +1,190 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HER2
#ifdef DOUBLE
#define HER2 BLASFUNC(zher2)
#else
#define HER2 BLASFUNC(cher2)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
blasint inc = 1;
char *p;
char uplo='U';
char trans='N';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
blasint m, i, j;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
fprintf(stderr, " %6d : ", (int)m);
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);
}
return 0;
}

View File

@ -150,7 +150,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -163,9 +163,9 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -177,8 +177,6 @@ int main(int argc, char *argv[]){
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

View File

@ -149,7 +149,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -162,8 +162,8 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -175,8 +175,6 @@ int main(int argc, char *argv[]){
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);

207
benchmark/hpmv.c Normal file
View File

@ -0,0 +1,207 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HPMV
#ifdef DOUBLE
#define HPMV BLASFUNC(zhpmv)
#else
#define HPMV BLASFUNC(chpmv)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz) {
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size) {
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char uplo='L';
blasint m, i, j;
blasint inc_x=1, inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) {
timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m, (int)m);
for(j = 0; j < m; j++) {
for(i = 0; i < m * COMPSIZE; i++) {
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (l = 0; l < loops; l++) {
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr, " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -181,7 +181,7 @@ int main(int argc, char *argv[]){
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
" %10.2f MBytes %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}

192
benchmark/iamin.c Normal file
View File

@ -0,0 +1,192 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IAMIN
#ifdef COMPLEX
#ifdef DOUBLE
#define IAMIN BLASFUNC(izamin)
#else
#define IAMIN BLASFUNC(icamin)
#endif
#else
#ifdef DOUBLE
#define IAMIN BLASFUNC(idamin)
#else
#define IAMIN BLASFUNC(isamin)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
IAMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

186
benchmark/imax.c Normal file
View File

@ -0,0 +1,186 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IMAX
#ifndef COMPLEX
#ifdef DOUBLE
#define IMAX BLASFUNC(idmax)
#else
#define IMAX BLASFUNC(ismax)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
IMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

186
benchmark/imin.c Normal file
View File

@ -0,0 +1,186 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IMIN
#ifndef COMPLEX
#ifdef DOUBLE
#define IMIN BLASFUNC(idmin)
#else
#define IMIN BLASFUNC(ismin)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
IMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -174,7 +174,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -186,7 +186,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -194,7 +194,7 @@ int main(int argc, char *argv[]){
for (j = 0; j < m; ++j) {
for (i = 0; i < m * COMPSIZE; ++i) {
b[i] += a[i + j * m * COMPSIZE];
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
}
}

185
benchmark/max.c Normal file
View File

@ -0,0 +1,185 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef NAMAX
#ifndef COMPLEX
#ifdef DOUBLE
#define NAMAX BLASFUNC(dmax)
#else
#define NAMAX BLASFUNC(smax)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
NAMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

185
benchmark/min.c Normal file
View File

@ -0,0 +1,185 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef NAMIN
#ifndef COMPLEX
#ifdef DOUBLE
#define NAMIN BLASFUNC(dmin)
#else
#define NAMIN BLASFUNC(smin)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
NAMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -170,46 +170,46 @@ int main(int argc, char *argv[]){
#ifndef COMPLEX
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[i + j * m] = 0.;
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[i + j * m] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[j + j * m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[i + j * m] = 0.;
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
}
}
#else
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[(i + j * m) * 2 + 0] = 0.;
a[(i + j * m) * 2 + 1] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
}
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[(j + j * m) * 2 + 1] = 0.;
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
for(i = j + 1; i < m; i++) {
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 0] = 0;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[(i + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(i + j * m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
a[(j + j * m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[(j + j * m) * 2 + 1] = 0.;
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;
for(i = j + 1; i < m; i++) {
a[(i + j * m) * 2 + 0] = 0.;
a[(i + j * m) * 2 + 1] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
}
}
}

View File

@ -32,9 +32,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#include "common.h"
#undef ROT
#undef DOT
#ifndef COMPLEX
#ifdef DOUBLE
#define ROT BLASFUNC(drot)
@ -42,6 +42,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ROT BLASFUNC(srot)
#endif
#else
#ifdef DOUBLE
#define ROT BLASFUNC(zdrot)
#else
#define ROT BLASFUNC(csrot)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
@ -147,7 +156,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -160,17 +169,16 @@ int main(int argc, char *argv[]){
fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
ROT (&m, x, &inc_x, y, &inc_y, c, s);
@ -179,13 +187,13 @@ int main(int argc, char *argv[]){
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
timeg += time1;
}
}
timeg /= loops;
timeg /= loops;
fprintf(stderr,
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);

210
benchmark/rotm.c Normal file
View File

@ -0,0 +1,210 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef ROTM
#ifdef DOUBLE
#define ROTM BLASFUNC(drotm)
#else
#define ROTM BLASFUNC(srotm)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz)
{
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv) {
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =
shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT | 0600)) < 0) {
printf("Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1) {
printf("Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[])
{
FLOAT *x, *y;
// FLOAT result;
blasint m, i;
blasint inc_x = 1, inc_y = 1;
FLOAT param[5] = {1, 2.0, 3.0, 4.0, 5.0};
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1, timeg;
argc--;
argv++;
if (argc > 0) {
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0) {
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0) {
step = atol(*argv);
argc--;
argv++;
}
if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY")))
inc_y = atoi(p);
fprintf(
stderr,
"From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n",
from, to, step, inc_x, inc_y, loops);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) ==
NULL) {
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}
if ((y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) ==
NULL) {
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for (m = from; m <= to; m += step) {
timeg = 0;
fprintf(stderr, " %6d : ", (int)m);
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
for (l = 0; l < loops; l++) {
gettimeofday(&start, (struct timezone *)0);
ROTM(&m, x, &inc_x, y, &inc_y, param);
gettimeofday(&stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) +
(double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr, " %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}

View File

@ -150,7 +150,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

219
benchmark/spmv.c Normal file
View File

@ -0,0 +1,219 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SPMV
#ifndef COMPLEX
#ifdef DOUBLE
#define SPMV BLASFUNC(dspmv)
#else
#define SPMV BLASFUNC(sspmv)
#endif
#else
#ifdef DOUBLE
#define SPMV BLASFUNC(zspmv)
#else
#define SPMV BLASFUNC(cspmv)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char uplo='L';
blasint m, i, j;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m,(int)m);
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

198
benchmark/spr.c Executable file
View File

@ -0,0 +1,198 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SPR
#ifdef DOUBLE
#define SPR BLASFUNC(dspr)
#else
#define SPR BLASFUNC(sspr)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a,*c;
FLOAT alpha[] = {1.0, 1.0};
blasint inc_x=1;
int loops = 1;
int l;
char *p;
char uplo='U';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
blasint m, i, j;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d\n", from, to, step,uplo,inc_x);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops Time\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
SPR (&uplo, &m, alpha, c, &inc_x, a);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

207
benchmark/spr2.c Executable file
View File

@ -0,0 +1,207 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SPR2
#ifdef DOUBLE
#define SPR2 BLASFUNC(dspr2)
#else
#define SPR2 BLASFUNC(sspr2)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a,*b,*c;
FLOAT alpha[] = {1.0, 1.0};
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;
char uplo='U';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
blasint m, i, j;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops Time\n");
for(m = from; m <= to; m += step)
{
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++)
{
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
}
timeg /= loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -151,7 +151,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -162,7 +162,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -175,9 +175,9 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -189,8 +189,6 @@ int main(int argc, char *argv[]){
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

View File

@ -162,7 +162,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -177,7 +177,7 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

185
benchmark/syr.c Normal file
View File

@ -0,0 +1,185 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SYR
#ifdef DOUBLE
#define SYR BLASFUNC(dsyr)
#else
#define SYR BLASFUNC(ssyr)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x,*a;
FLOAT alpha[] = {1.0, 1.0};
char *p;
char uplo='U';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
blasint m, i, j;
blasint inc_x= 1;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d\n", from, to, step,uplo,inc_x);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
gettimeofday( &start, (struct timezone *)0);
SYR (&uplo, &m, alpha, x, &inc_x, a, &m );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

194
benchmark/syr2.c Normal file
View File

@ -0,0 +1,194 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SYR2
#ifdef DOUBLE
#define SYR2 BLASFUNC(dsyr2)
#else
#define SYR2 BLASFUNC(ssyr2)
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x, *y, *a;
FLOAT alpha[] = {1.0, 1.0};
char *p;
char uplo='U';
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
blasint m, i, j;
blasint inc_x= 1;
blasint inc_y= 1;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Inc_x = %d Inc_y = %d\n", from, to, step,uplo,inc_x,inc_y);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
{
fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
gettimeofday( &start, (struct timezone *)0);
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -162,7 +162,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -175,9 +175,9 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -189,8 +189,6 @@ int main(int argc, char *argv[]){
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

View File

@ -159,7 +159,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -172,8 +172,8 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -185,8 +185,6 @@ int main(int argc, char *argv[]){
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);

172
benchmark/tpmv.c Normal file
View File

@ -0,0 +1,172 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef TPMV
#ifndef COMPLEX
#ifdef DOUBLE
#define TPMV BLASFUNC(dtpmv)
#else
#define TPMV BLASFUNC(stpmv)
#endif
#else
#ifdef DOUBLE
#define TPMV BLASFUNC(ztpmv)
#else
#define TPMV BLASFUNC(ctpmv)
#endif
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1) {
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[])
{
FLOAT *a, *x;
char *p;
char uplo ='U';
char trans='N';
char diag ='U';
int loops = 1;
int l;
blasint inc_x=1;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
blasint n, i, j;
int from = 1;
int to = 200;
int step = 1;
struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Diag = %c Loops=%d Inc_x=%d\n", from,
to, step, uplo, trans, diag, loops, inc_x);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(n = from; n <= to; n += step) {
timeg=0;
fprintf(stderr, " %6d : ", (int)n);
for(j = 0; j < n; j++) {
for(i = 0; i < n * COMPSIZE; i++) {
a[(long)i + (long)j * (long)n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (i = 0; i < n * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (l = 0; l < loops; l++) {
clock_gettime(CLOCK_REALTIME, &start);
TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x);
clock_gettime(CLOCK_REALTIME, &stop);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
timeg += time1;
}
timeg /= loops;
fprintf(stderr, " %10.2f MFlops %12.9f sec\n",
COMPSIZE * COMPSIZE * 1. * (double)n * (double)n / timeg / 1.e6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

172
benchmark/tpsv.c Normal file
View File

@ -0,0 +1,172 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef TPSV
#ifndef COMPLEX
#ifdef DOUBLE
#define TPSV BLASFUNC(dtpsv)
#else
#define TPSV BLASFUNC(stpsv)
#endif
#else
#ifdef DOUBLE
#define TPSV BLASFUNC(ztpsv)
#else
#define TPSV BLASFUNC(ctpsv)
#endif
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1) {
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[])
{
FLOAT *a, *x;
char *p;
char uplo ='U';
char trans='N';
char diag ='U';
int loops = 1;
int l;
blasint inc_x=1;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
blasint n, i, j;
int from = 1;
int to = 200;
int step = 1;
struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Diag = %c Loops=%d Inc_x=%d\n", from,
to, step, uplo, trans, diag, loops, inc_x);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(n = from; n <= to; n += step) {
timeg=0;
fprintf(stderr, " %6d : ", (int)n);
for(j = 0; j < n; j++) {
for(i = 0; i < n * COMPSIZE; i++) {
a[(long)i + (long)j * (long)n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (i = 0; i < n * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (l = 0; l < loops; l++) {
clock_gettime(CLOCK_REALTIME, &start);
TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x);
clock_gettime(CLOCK_REALTIME, &stop);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
timeg += time1;
}
timeg /= loops;
fprintf(stderr, " %10.2f MFlops %12.9f sec\n",
COMPSIZE * COMPSIZE * 1. * (double)n * (double)n / timeg / 1.e6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -162,7 +162,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -175,8 +175,8 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
@ -188,8 +188,6 @@ int main(int argc, char *argv[]){
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6, time1);

172
benchmark/trmv.c Normal file
View File

@ -0,0 +1,172 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef TRMV
#ifndef COMPLEX
#ifdef DOUBLE
#define TRMV BLASFUNC(dtrmv)
#else
#define TRMV BLASFUNC(strmv)
#endif
#else
#ifdef DOUBLE
#define TRMV BLASFUNC(ztrmv)
#else
#define TRMV BLASFUNC(ctrmv)
#endif
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1) {
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[])
{
FLOAT *a, *x;
char *p;
char uplo ='U';
char trans='N';
char diag ='U';
int loops = 1;
int l;
blasint inc_x=1;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
blasint n, i, j;
int from = 1;
int to = 200;
int step = 1;
struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Diag = %c Loops=%d Inc_x=%d\n", from,
to, step, uplo, trans, diag, loops, inc_x);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
for(n = from; n <= to; n += step) {
timeg=0;
fprintf(stderr, " %6d : ", (int)n);
for(j = 0; j < n; j++) {
for(i = 0; i < n * COMPSIZE; i++) {
a[(long)i + (long)j * (long)n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for (i = 0; i < n * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (l = 0; l < loops; l++) {
clock_gettime(CLOCK_REALTIME, &start);
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
clock_gettime(CLOCK_REALTIME, &stop);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
timeg += time1;
}
timeg /= loops;
fprintf(stderr, " %10.2f MFlops %12.9f sec\n",
COMPSIZE * COMPSIZE * 1. * (double)n * (double)n / timeg / 1.e6, timeg);
}
return 0;
}
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

View File

@ -172,7 +172,7 @@ int main(int argc, char *argv[]){
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -191,8 +191,8 @@ int main(int argc, char *argv[]){
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

222
benchmark/trsv.c Normal file
View File

@ -0,0 +1,222 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include <time.h>
#include "common.h"
#undef GEMV
#undef TRSV
#ifndef COMPLEX
#ifdef DOUBLE
#define TRSV BLASFUNC(dtrsv)
#else
#define TRSV BLASFUNC(strsv)
#endif
#else
#ifdef DOUBLE
#define TRSV BLASFUNC(ztrsv)
#else
#define TRSV BLASFUNC(ctrsv)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *a, *x;
blasint n = 0, i, j;
blasint inc_x=1;
int loops = 1;
int l;
char *p;
int from = 1;
int to = 200;
int step = 1;
struct timespec time_start, time_end;
time_t seconds = 0;
double time1,timeg;
long long nanos = 0;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
char uplo ='L';
char transa = 'N';
char diag ='U';
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_TRANSA"))) transa=*p;
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
fprintf(stderr, "From : %3d To : %3d Step = %3d Transa = '%c' Inc_x = %d uplo=%c diag=%c loop = %d\n", from, to, step,transa,inc_x,
uplo,diag,loops);
#ifdef __linux
srandom(getpid());
#endif
fprintf(stderr, " SIZE Flops\n");
fprintf(stderr, "============================================\n");
for(n = from; n <= to; n += step)
{
timeg=0;
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * n * n * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * n * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
for(j = 0; j < n; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[i + j * n * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for(l =0;l< loops;l++){
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start);
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end);
nanos = time_end.tv_nsec - time_start.tv_nsec;
seconds = time_end.tv_sec - time_start.tv_sec;
time1 = seconds + nanos /1.e9;
timeg += time1;
}
timeg /= loops;
long long muls = n*(n+1)/2.0;
long long adds = (n - 1.0)*n/2.0;
fprintf(stderr, "%10d %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
if(a != NULL){
free(a);
}
if( x != NULL){
free(x);
}
}
return 0;
}

View File

@ -146,7 +146,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif

View File

@ -145,7 +145,7 @@ int main(int argc, char *argv[]){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
#ifdef linux
#ifdef __linux
srandom(getpid());
#endif
@ -170,9 +170,11 @@ int main(int argc, char *argv[]){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
#ifdef RETURN_BY_STACK
DOT (&result , &m, x, &inc_x, y, &inc_y );
#else
result = DOT (&m, x, &inc_x, y, &inc_y );
#endif
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

28
c_check
View File

@ -6,8 +6,9 @@
# Checking cross compile
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
$hostarch = `uname -p` if ($hostos eq "AIX");
$hostarch = "x86_64" if ($hostarch eq "amd64");
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64");
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
$hostarch = "zarch" if ($hostarch eq "s390x");
@ -254,6 +255,28 @@ if (($architecture eq "x86") || ($architecture eq "x86_64")) {
}
}
$c11_atomics = 0;
if ($data =~ /HAVE_C11/) {
eval "use File::Temp qw(tempfile)";
if ($@){
warn "could not load PERL module File::Temp, so could not check compiler compatibility with C11";
$c11_atomics = 0;
} else {
($fh,$tmpf) = tempfile( SUFFIX => '.c' , UNLINK => 1 );
print $tmpf "#include <stdatomic.h>\nint main(void){}\n";
$args = " -c -o $tmpf.o $tmpf";
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
system(@cmd) == 0;
if ($? != 0) {
$c11_atomics = 0;
} else {
$c11_atomics = 1;
}
unlink("$tmpf.o");
}
}
$data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
$data =~ /globl\s([_\.]*)(.*)/;
@ -316,6 +339,7 @@ $linker_a = "";
&& ($flags !~ /advapi32/)
&& ($flags !~ /shell32/)
&& ($flags !~ /omp/)
&& ($flags !~ /[0-9]+/)
) {
$linker_l .= $flags . " "
}
@ -356,6 +380,8 @@ print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1;
if ($os eq "LINUX") {

16
cblas.h
View File

@ -25,6 +25,11 @@ char* openblas_get_config(void);
/*Get the CPU corename on runtime.*/
char* openblas_get_corename(void);
#ifdef OPENBLAS_OS_LINUX
/* Sets thread affinity for OpenBLAS threads. `thread_idx` is in [0, openblas_get_num_threads()-1]. */
int openblas_setaffinity(int thread_idx, size_t cpusetsize, cpu_set_t* cpu_set);
#endif
/* Get the parallelization type which is used by OpenBLAS */
int openblas_get_parallel(void);
/* OpenBLAS is compiled for sequential use */
@ -377,6 +382,17 @@ void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
double *c, OPENBLAS_CONST blasint cldc);
/*** BFLOAT16 and INT8 extensions ***/
/* convert float array to BFLOAT16 array by rounding */
void cblas_sbstobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout);
/* convert double array to BFLOAT16 array by rounding */
void cblas_sbdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout);
/* convert BFLOAT16 array to float array */
void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, float *out, OPENBLAS_CONST blasint incout);
/* convert BFLOAT16 array to double array */
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
/* dot production of BFLOAT16 input arrays, and output as float */
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
#ifdef __cplusplus
}

View File

@ -1,4 +1,3 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets various variables based on architecture.
@ -45,11 +44,11 @@ endif ()
if (DYNAMIC_ARCH)
if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
endif ()
if (POWER)
set(DYNAMIC_CORE POWER6 POWER8 POWER9)
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
endif ()
if (X86)
@ -76,12 +75,16 @@ if (DYNAMIC_ARCH)
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
endif ()
if (NOT NO_AVX512)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX COOPERLAKE)
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
endif ()
if (DYNAMIC_LIST)
set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST})
endif ()
if (DYNAMIC_LIST)
set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST})
endif ()
endif ()
if (EXISTS ${PROJECT_SOURCE_DIR}/config_kernel.h)
message (FATAL_ERROR "Your build directory contains a file config_kernel.h, probably from a previous compilation with make. This will conflict with the cmake compilation and cause strange compiler errors - please remove the file before trying again")
endif ()
if (NOT DYNAMIC_CORE)

View File

@ -99,7 +99,44 @@ endif ()
if (${CORE} STREQUAL "SKYLAKEX")
if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512)
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=skylake-avx512")
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
endif ()
endif ()
endif ()
if (${CORE} STREQUAL "COOPERLAKE")
if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=cooperlake")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
endif()
endif ()
endif ()
endif ()
if (NOT DYNAMIC_ARCH)
if (HAVE_AVX2)
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2")
endif ()
if (HAVE_AVX)
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx")
endif ()
if (HAVE_SSE)
set (CCOMMON_OPT "${CCOMMON_OPT} -msse")
endif ()
if (HAVE_SSE2)
set (CCOMMON_OPT "${CCOMMON_OPT} -msse2")
endif ()
if (HAVE_SSE3)
set (CCOMMON_OPT "${CCOMMON_OPT} -msse3")
endif ()
if (HAVE_SSSE3)
set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3")
endif ()
if (HAVE_SSE4_1)
set (CCOMMON_OPT "${CCOMMON_OPT} -msse4.1")
endif ()
endif()

View File

@ -21,7 +21,15 @@
# NEED2UNDERSCORES
if (NOT NO_LAPACK)
enable_language(Fortran)
include(CheckLanguage)
check_language(Fortran)
if(CMAKE_Fortran_COMPILER)
enable_language(Fortran)
else()
message(STATUS "No Fortran compiler found, can build only BLAS but not LAPACK")
set (NOFORTRAN 1)
set (NO_LAPACK 1)
endif()
else()
include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)

View File

@ -16,6 +16,7 @@ if (${F_COMPILER} STREQUAL "FLANG")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -Mrecursive -Kieee")
endif ()
if (${F_COMPILER} STREQUAL "G77")
@ -81,6 +82,7 @@ if (${F_COMPILER} STREQUAL "INTEL")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -recursive")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
@ -120,6 +122,7 @@ if (${F_COMPILER} STREQUAL "PGI")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -Mrecursive")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()

View File

@ -113,11 +113,33 @@ macro(SetDefaultL1)
set(ZSUMKERNEL zsum.S)
set(QSUMKERNEL sum.S)
set(XSUMKERNEL zsum.S)
if (BUILD_BFLOAT16)
set(SHAMINKERNEL ../arm/amin.c)
set(SHAMAXKERNEL ../arm/amax.c)
set(SHMAXKERNEL ../arm/max.c)
set(SHMINKERNEL ../arm/min.c)
set(ISHAMAXKERNEL ../arm/iamax.c)
set(ISHAMINKERNEL ../arm/iamin.c)
set(ISHMAXKERNEL ../arm/imax.c)
set(ISHMINKERNEL ../arm/imin.c)
set(SHASUMKERNEL ../arm/asum.c)
set(SHAXPYKERNEL ../arm/axpy.c)
set(SHAXPBYKERNEL ../arm/axpby.c)
set(SHCOPYKERNEL ../arm/copy.c)
set(SBDOTKERNEL ../x86_64/sbdot.c)
set(SHROTKERNEL ../arm/rot.c)
set(SHSCALKERNEL ../arm/scal.c)
set(SHNRM2KERNEL ../arm/nrm2.c)
set(SHSUMKERNEL ../arm/sum.c)
set(SHSWAPKERNEL ../arm/swap.c)
set(TOBF16KERNEL ../x86_64/tobf16.c)
set(BF16TOKERNEL ../x86_64/bf16to.c)
endif ()
endmacro ()
macro(SetDefaultL2)
set(SGEMVNKERNEL gemv_n.S)
set(SGEMVTKERNEL gemv_t.S)
set(SGEMVNKERNEL ../arm/gemv_n.c)
set(SGEMVTKERNEL ../arm/gemv_t.c)
set(DGEMVNKERNEL gemv_n.S)
set(DGEMVTKERNEL gemv_t.S)
set(CGEMVNKERNEL zgemv_n.S)
@ -161,6 +183,11 @@ macro(SetDefaultL2)
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
if (BUILD_BFLOAT16)
set(SBGEMVNKERNEL ../arm/gemv_n.c)
set(SBGEMVTKERNEL ../arm/gemv_t.c)
set(SHGERKERNEL ../generic/ger.c)
endif ()
endmacro ()
macro(SetDefaultL3)
@ -168,4 +195,18 @@ macro(SetDefaultL3)
set(DGEADD_KERNEL ../generic/geadd.c)
set(CGEADD_KERNEL ../generic/zgeadd.c)
set(ZGEADD_KERNEL ../generic/zgeadd.c)
if (BUILD_BFLOAT16)
set(SHGEADD_KERNEL ../generic/geadd.c)
set(SBGEMMKERNEL ../generic/gemmkernel_2x2.c)
set(SBGEMM_BETA ../generic/gemm_beta.c)
set(SBGEMMINCOPY ../generic/gemm_ncopy_2.c)
set(SBGEMMITCOPY ../generic/gemm_tcopy_2.c)
set(SBGEMMONCOPY ../generic/gemm_ncopy_2.c)
set(SBGEMMOTCOPY ../generic/gemm_tcopy_2.c)
set(SBGEMMINCOPYOBJ sbgemm_incopy.o)
set(SBGEMMITCOPYOBJ sbgemm_itcopy.o)
set(SBGEMMONCOPYOBJ sbgemm_oncopy.o)
set(SBGEMMOTCOPYOBJ sbgemm_otcopy.o)
endif ()
endmacro ()

View File

@ -1,11 +1,12 @@
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f dlaset.f
../INSTALL/ilaver.f xerbla_array.f
../INSTALL/slamch.f)
set(SCLAUX
scombssq.f sbdsvdx.f sstevx.f sstein.f
sbdsdc.f
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
@ -25,6 +26,7 @@ set(SCLAUX
set(DZLAUX
dbdsdc.f
dbdsvdx.f dstevx.f dstein.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
@ -35,14 +37,14 @@ set(DZLAUX
dlartg.f dlaruv.f dlas2.f dlascl.f
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f)
set(SLASRC
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
sgehd2.f sgehrd.f sgelq2.f sgelqf.f
@ -83,8 +85,8 @@ set(SLASRC
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
sstevx.f ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstev.f sstevd.f sstevr.f
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
ssyswapr.f ssytrs.f ssytrs2.f
@ -116,7 +118,7 @@ set(SLASRC
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
scombssq.f sgesvdq.f slaorhr_col_getrfnp.f
sgesvdq.f slaorhr_col_getrfnp.f
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
@ -229,7 +231,7 @@ set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
cla_lin_berr.f clarscl2.f clascl2.f cla_wwaddw.f)
set(DLASRC
dbdsvdx.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
dgehd2.f dgehrd.f dgelq2.f dgelqf.f
@ -270,8 +272,8 @@ set(DLASRC
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
dstevx.f dsycon.f dsyev.f dsyevd.f dsyevr.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstev.f dstevd.f dstevr.f
dsycon.f dsyev.f dsyevd.f dsyevr.f
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
dsysv.f dsysvx.f
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytrs.f dsytrs2.f
@ -474,12 +476,16 @@ endif()
if(BUILD_COMPLEX)
set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX})
SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN})
message(STATUS "Building Complex Precision")
message(STATUS "Building Single Precision Complex")
endif()
if(BUILD_COMPLEX16)
set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX})
SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN})
message(STATUS "Building Double Complex Precision")
# for zlange/zlanhe
if (NOT BUILD_DOUBLE)
set (LA_REL_SRC ${LA_REL_SRC} dcombssq.f)
endif ()
message(STATUS "Building Double Precision Complex")
endif()
# add lapack-netlib folder to the sources

View File

@ -7,5 +7,5 @@ Name: OpenBLAS
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: @OPENBLAS_VERSION@
URL: https://github.com/xianyi/OpenBLAS
Libs: -L${libdir} -lopenblas${libsuffix}
Libs: @OpenMP_C_FLAGS@ -L${libdir} -lopenblas${libsuffix}
Cflags: -I${includedir}

View File

@ -8,7 +8,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
set(NO_EXPRECISION 1)
endif ()
if (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD|OpenBSD|NetBSD|DragonFly")
if (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD|OpenBSD|NetBSD|DragonFly|Darwin")
set(EXTRALIB "${EXTRALIB} -lm")
set(NO_EXPRECISION 1)
endif ()

View File

@ -16,6 +16,8 @@
# HAVE_SSE2
# HAVE_SSE3
# MAKE
# SBGEMM_UNROLL_M
# SBGEMM_UNROLL_N
# SGEMM_UNROLL_M
# SGEMM_UNROLL_N
# DGEMM_UNROLL_M
@ -193,8 +195,13 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
"#define HAVE_VFP\n"
"#define HAVE_NEON\n"
"#define ARMV8\n")
if ("${TCORE}" STREQUAL "CORTEXA57")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
else ()
set(SGEMM_UNROLL_M 8)
set(SGEMM_UNROLL_N 8)
endif ()
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
@ -229,6 +236,33 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "NEOVERSEN1")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t65536\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t4\n"
"#define L1_DATA_SIZE\t65536\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t2\n"
"#define L2_SIZE\t1048576\n\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t16\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define HAVE_VFPV4\n"
"#define HAVE_VFPV3\n"
"#define HAVE_VFP\n"
"#define HAVE_NEON\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "FALKOR")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t65536\n"
@ -309,6 +343,33 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "THUNDERX3T110")
file(APPEND ${TARGET_CONF_TEMP}
"#define THUNDERX3T110\n"
"#define L1_CODE_SIZE\t65536\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t8\n"
"#define L1_DATA_SIZE\t65536\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t8\n"
"#define L2_SIZE\t524288\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t8\n"
"#define L3_SIZE\t94371840\n"
"#define L3_LINESIZE\t64\n"
"#define L3_ASSOCIATIVE\t32\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "TSV110")
file(APPEND ${TARGET_CONF_TEMP}
"#define ARMV8\n"
@ -391,7 +452,7 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_M 8)
set(ZGEMM_UNROLL_N 2)
set(SYMV_P 8)
elseif ("${TCORE}" STREQUAL "POWER9")
elseif ("${TCORE}" STREQUAL "POWER9" OR "${TCORE}" STREQUAL "POWER10")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE 32768\n"
"#define L1_DATA_LINESIZE 128\n"
@ -410,6 +471,8 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_N 2)
set(SYMV_P 8)
endif()
set(SBGEMM_UNROLL_M 8)
set(SBGEMM_UNROLL_N 4)
# Or should this actually be NUM_CORES?
if (${NUM_THREADS} GREATER 0)
@ -461,7 +524,7 @@ else(NOT CMAKE_CROSSCOMPILING)
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I"${GETARCH_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
)
@ -489,7 +552,7 @@ execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I"${GETARCH2_DIR}" -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
)

View File

@ -33,7 +33,7 @@ endif ()
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
message(STATUS "Compiling a ${BINARY}-bit binary.")
set(NO_AVX 1)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE")
set(TARGET "NEHALEM")
endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
@ -45,6 +45,18 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
endif ()
if (DEFINED TARGET)
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
# endif()
endif()
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
@ -58,6 +70,21 @@ if (DEFINED TARGET)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_SSE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
endif()
if (DEFINED HAVE_SSE2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
endif()
if (DEFINED HAVE_SSE4_1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
endif()
endif()
if (DEFINED TARGET)
@ -98,6 +125,11 @@ if (NO_AVX2)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
endif ()
if (NO_AVX512)
message(STATUS "Disabling Advanced Vector Extensions 512 (AVX512).")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX512")
endif ()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(GETARCH_FLAGS "${GETARCH_FLAGS} ${CMAKE_C_FLAGS_DEBUG}")
endif ()
@ -297,6 +329,22 @@ if (USE_SIMPLE_THREADED_LEVEL3)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
if (DEFINED MAX_STACK_ALLOC)
if (NOT ${MAX_STACK_ALLOC} EQUAL 0)
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_STACK_ALLOC=${MAX_STACK_ALLOC}")
endif ()
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_STACK_ALLOC=2048")
endif ()
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
if (DEFINED BLAS3_MEM_ALLOC_THRESHOLD)
if (NOT ${BLAS3_MEM_ALLOC_THRESHOLD} EQUAL 32)
set(CCOMMON_OPT "${CCOMMON_OPT} -DBLAS3_MEM_ALLOC_THRESHOLD=${BLAS3_MEM_ALLOC_THRESHOLD}")
endif()
endif()
endif()
if (DEFINED LIBNAMESUFFIX)
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
else ()
@ -366,6 +414,25 @@ set(REVISION "-r${OpenBLAS_VERSION}")
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CCOMMON_OPT}")
if (NOT BUILD_SINGLE AND NOT BUILD_DOUBLE AND NOT BUILD_COMPLEX AND NOT BUILD_COMPLEX16)
set (BUILD_SINGLE ON)
set (BUILD_DOUBLE ON)
set (BUILD_COMPLEX ON)
set (BUILD_COMPLEX16 ON)
endif()
if (BUILD_SINGLE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_SINGLE")
endif()
if (BUILD_DOUBLE)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE")
endif()
if (BUILD_COMPLEX)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX")
endif()
if (BUILD_COMPLEX16)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16")
endif()
if(NOT MSVC)
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CCOMMON_OPT}")
endif()
@ -407,6 +474,14 @@ if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
endif ()
if ("${CMAKE_BUILD_TYPE}" STREQUAL "Release")
if ("${F_COMPILER}" STREQUAL "FLANG")
if (${CMAKE_Fortran_COMPILER_VERSION} VERSION_LESS_EQUAL 3)
set(CMAKE_Fortran_FLAGS_RELEASE "${CMAKE_Fortran_FLAGS_RELEASE} -fno-unroll-loops")
endif ()
endif ()
endif ()
if (NOT DEFINED SUFFIX)
set(SUFFIX o)
endif ()
@ -530,6 +605,8 @@ endif ()
#export FUNCTION_PROFILE
#export TARGET_CORE
#
#export SBGEMM_UNROLL_M
#export SBGEMM_UNROLL_N
#export SGEMM_UNROLL_M
#export SGEMM_UNROLL_N
#export DGEMM_UNROLL_M

View File

@ -54,14 +54,14 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
endif()
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
set(X86 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(ARM 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
set(ARM64 1)
else()
set(ARM 1)
endif()
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(ARM 1)
elseif (${CMAKE_CROSSCOMPILING})
if (${TARGET} STREQUAL "CORE2")
if (NOT BINARY)
@ -109,10 +109,18 @@ else()
endif()
if (X86_64 OR X86)
file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -c -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512)
if (NOT NO_AVX512)
file(WRITE ${PROJECT_BINARY_DIR}/avx512.c "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -c -v -o ${PROJECT_BINARY_DIR}/avx512.o ${PROJECT_BINARY_DIR}/avx512.c OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512)
if (NO_AVX512 EQUAL 1)
set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
endif()
file(REMOVE "avx512.tmp" "avx512.o")
file(REMOVE "avx512.c" "avx512.o")
endif()
endif()
include(CheckIncludeFile)
CHECK_INCLUDE_FILE("stdatomic.h" HAVE_C11)
if (HAVE_C11 EQUAL 1)
set (CCOMMON_OPT "${CCOMMON_OPT} -DHAVE_C11")
endif()

View File

@ -15,12 +15,36 @@ endfunction ()
# Reads a Makefile into CMake vars.
macro(ParseMakefileVars MAKEFILE_IN)
message(STATUS "Reading vars from ${MAKEFILE_IN}...")
set (IfElse 0)
set (ElseSeen 0)
file(STRINGS ${MAKEFILE_IN} makefile_contents)
foreach (makefile_line ${makefile_contents})
#message(STATUS "parsing ${makefile_line}")
if (${IfElse} GREATER 0)
string(REGEX MATCH "endif[ \t]*" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
# message(STATUS "ENDIF ${makefile_line}")
set (IfElse 0)
set (ElseSeen 0)
continue ()
endif ()
string(REGEX MATCH "else[ \t]*" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
# message(STATUS "ELSE ${makefile_line}")
set (ElseSeen 1)
continue ()
endif()
if ( (${IfElse} EQUAL 2 AND ${ElseSeen} EQUAL 0) OR ( ${IfElse} EQUAL 1 AND ${ElseSeen} EQUAL 1))
# message(STATUS "skipping ${makefile_line}")
continue ()
endif ()
endif ()
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
#message(STATUS "match on ${line_match}")
set(var_name ${CMAKE_MATCH_1})
set(var_value ${CMAKE_MATCH_2})
# set(var_value ${CMAKE_MATCH_2})
string(STRIP ${CMAKE_MATCH_2} var_value)
# check for Makefile variables in the string, e.g. $(TSUFFIX)
string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
foreach (make_var ${make_var_matches})
@ -33,7 +57,31 @@ macro(ParseMakefileVars MAKEFILE_IN)
else ()
string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
#message(STATUS "match on include ${line_match}")
ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1})
else ()
# message(STATUS "unmatched line ${line_match}")
string(REGEX MATCH "ifeq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
# message(STATUS "IFEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
if (DEFINED ${${CMAKE_MATCH_1}} AND ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2})
# message (STATUS "condition is true")
set (IfElse 1)
else ()
set (IfElse 2)
endif ()
else ()
string(REGEX MATCH "ifneq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
# message(STATUS "IFNEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
if (NOT ( ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2}))
# message (STATUS "condition is true")
set (IfElse 1)
else ()
set (IfElse 2)
endif ()
endif ()
endif ()
endif ()
endif ()
endforeach ()
@ -163,6 +211,7 @@ function(GenerateNamedObjects sources_in)
if (complex_only)
list(REMOVE_ITEM float_list "SINGLE")
list(REMOVE_ITEM float_list "DOUBLE")
list(REMOVE_ITEM float_list "BFLOAT16")
elseif (real_only)
list(REMOVE_ITEM float_list "COMPLEX")
list(REMOVE_ITEM float_list "ZCOMPLEX")
@ -176,6 +225,9 @@ function(GenerateNamedObjects sources_in)
if (NOT no_float_type)
string(SUBSTRING ${float_type} 0 1 float_char)
string(TOLOWER ${float_char} float_char)
if (${float_type} STREQUAL "BFLOAT16")
set (float_char "sb")
endif ()
endif ()
if (NOT name_in)
@ -210,6 +262,9 @@ function(GenerateNamedObjects sources_in)
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "DOUBLE")
endif ()
if (${float_type} STREQUAL "BFLOAT16")
list(APPEND obj_defines "BFLOAT16")
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "COMPLEX")
if (mangle_complex_sources)

View File

@ -257,6 +257,12 @@ typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
#ifndef bfloat16
#include <stdint.h>
typedef uint16_t bfloat16;
#define BFLOAT16CONVERSION 1
#endif
#ifdef USE64BITINT
typedef BLASLONG blasint;
#if defined(OS_WINDOWS) && defined(__64BIT__)
@ -297,6 +303,13 @@ typedef int blasint;
#define SIZE 8
#define BASE_SHIFT 3
#define ZBASE_SHIFT 4
#elif defined(BFLOAT16)
#define IFLOAT bfloat16
#define XFLOAT IFLOAT
#define FLOAT float
#define SIZE 2
#define BASE_SHIFT 1
#define ZBASE_SHIFT 2
#else
#define FLOAT float
#define SIZE 4
@ -308,6 +321,10 @@ typedef int blasint;
#define XFLOAT FLOAT
#endif
#ifndef IFLOAT
#define IFLOAT FLOAT
#endif
#ifndef COMPLEX
#define COMPSIZE 1
#else
@ -335,7 +352,7 @@ typedef int blasint;
#endif
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
#endif
#ifdef BULLDOZER
@ -344,13 +361,8 @@ typedef int blasint;
#endif
#endif
#ifdef POWER8
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif
#ifdef POWER9
#if defined(POWER8) || defined(POWER9) || defined(POWER10)
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
@ -390,7 +402,7 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#endif
#ifndef BLAS3_MEM_ALLOC_THRESHOLD
#define BLAS3_MEM_ALLOC_THRESHOLD 160
#define BLAS3_MEM_ALLOC_THRESHOLD 32
#endif
#ifdef QUAD_PRECISION
@ -662,6 +674,8 @@ void gotoblas_dynamic_init(void);
void gotoblas_dynamic_quit(void);
void gotoblas_profile_init(void);
void gotoblas_profile_quit(void);
int support_avx512(void);
#ifdef USE_OPENMP
@ -673,7 +687,7 @@ __declspec(dllimport) int __cdecl omp_in_parallel(void);
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
#endif
#if (__STDC_VERSION__ >= 201112L)
#ifdef HAVE_C11
#if defined(C_GCC) && ( __GNUC__ < 7)
// workaround for GCC bug 65467
#ifndef _Atomic

View File

@ -43,6 +43,7 @@
#define MB asm("mb")
#define WMB asm("wmb")
#define RMB asm("rmb")
static void __inline blas_lock(unsigned long *address){
#ifndef __DECC

View File

@ -37,11 +37,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define MB
#define WMB
#define RMB
#else
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
#define RMB __asm__ __volatile__ ("dmb ish" : : : "memory")
#endif
@ -121,7 +123,7 @@ REALNAME:
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#define BUFFER_SIZE (16 << 20)
#define BUFFER_SIZE (32 << 20)
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)

View File

@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
#define RMB __asm__ __volatile__ ("dmb ishld" : : : "memory")
#define INLINE inline
@ -53,16 +53,16 @@ static void __inline blas_lock(volatile BLASULONG *address){
BLASULONG ret;
do {
while (*address) {YIELDING;};
__asm__ __volatile__(
"mov x4, #1 \n\t"
"sevl \n\t"
"1: \n\t"
"wfe \n\t"
"2: \n\t"
"ldaxr x2, [%1] \n\t"
"cbnz x2, 1b \n\t"
"2: \n\t"
"stxr w3, x4, [%1] \n\t"
"cbnz w3, 1b \n\t"
"cbnz w3, 2b \n\t"
"mov %0, #0 \n\t"
: "=r"(ret), "=r"(address)
: "1"(address)
@ -81,10 +81,12 @@ static void __inline blas_lock(volatile BLASULONG *address){
#if !defined(OS_DARWIN) && !defined (OS_ANDROID)
static __inline BLASULONG rpcc(void){
BLASULONG ret = 0;
blasint shift;
__asm__ __volatile__ ("isb; mrs %0,cntvct_el0":"=r"(ret));
__asm__ __volatile__ ("mrs %0,cntfrq_el0; clz %w0, %w0":"=&r"(shift));
return ret;
return ret << shift;
}
#define RPCC_DEFINED
@ -139,12 +141,17 @@ REALNAME:
#endif
#define HUGE_PAGESIZE ( 4 << 20)
#ifndef BUFFERSIZE
#if defined(CORTEXA57)
#define BUFFER_SIZE (20 << 20)
#elif defined(TSV110) || defined(EMAG8180)
#define BUFFER_SIZE (32 << 20)
#else
#define BUFFER_SIZE (16 << 20)
#endif
#else
#define BUFFER_SIZE (32 << BUFFERSIZE)
#endif
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)

View File

@ -47,6 +47,7 @@
#define MB
#define WMB
#define RMB
#ifdef __ECC
#include <ia64intrin.h>

View File

@ -54,6 +54,11 @@ double BLASFUNC(dsdot) (blasint *, float *, blasint *, float *, blasint *);
double BLASFUNC(ddot) (blasint *, double *, blasint *, double *, blasint *);
xdouble BLASFUNC(qdot) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
float BLASFUNC(sbdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *);
void BLASFUNC(sbstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *);
void BLASFUNC(sbdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *);
void BLASFUNC(sbf16tos) (blasint *, bfloat16 *, blasint *, float *, blasint *);
void BLASFUNC(dbf16tod) (blasint *, bfloat16 *, blasint *, double *, blasint *);
#ifdef RETURN_BY_STRUCT
typedef struct {
@ -469,6 +474,8 @@ void BLASFUNC(xhbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint
/* Level 3 routines */
void BLASFUNC(sbgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
bfloat16 *, blasint *, bfloat16 *, blasint *, float *, float *, blasint *);
void BLASFUNC(sgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
float *, blasint *, float *, blasint *, float *, float *, blasint *);
void BLASFUNC(dgemm)(char *, char *, blasint *, blasint *, blasint *, double *,

View File

@ -46,6 +46,12 @@ float sdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
float sbdot_k(BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
void sbstobf16_k(BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
void sbdtobf16_k(BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
void sbf16tos_k (BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
void dbf16tod_k (BLASLONG, bfloat16 *, BLASLONG, double *, BLASLONG);
openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);

View File

@ -47,14 +47,16 @@ __global__ void cuda_dgemm_kernel(int, int, int, double *, double *, double *);
extern "C" {
#endif
extern void sgemm_kernel_direct(BLASLONG M, BLASLONG N, BLASLONG K,
void sgemm_direct(BLASLONG M, BLASLONG N, BLASLONG K,
float * A, BLASLONG strideA,
float * B, BLASLONG strideB,
float * R, BLASLONG strideR);
extern int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
int sgemm_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
int sbgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,
@ -76,6 +78,10 @@ int xgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
#endif
int sbgemm_incopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
int sbgemm_itcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
int sbgemm_oncopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
int sbgemm_otcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
int sgemm_incopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
int sgemm_itcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
int sgemm_oncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
@ -499,6 +505,7 @@ int xher2k_kernel_UC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdoubl
int xher2k_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
int xher2k_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
int sbgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
int sgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);
int dgemm_kernel(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
@ -527,6 +534,11 @@ int cgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float
int zgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
int xgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
int sbgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
int sbgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
int sbgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
int sbgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
int sgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
int sgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
int sgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
@ -619,6 +631,11 @@ int xgemm_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLON
int xgemm_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
#endif
int sbgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
int sbgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
int sbgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
int sbgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
int sgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
int sgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
int sgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);

View File

@ -39,6 +39,7 @@
#ifndef COMMON_MACRO
#define COMMON_MACRO
#include "common_sb.h"
#include "common_s.h"
#include "common_d.h"
#include "common_q.h"
@ -642,6 +643,294 @@
#define IMATCOPY_K_RT DIMATCOPY_K_RT
#define GEADD_K DGEADD_K
#elif defined(BFLOAT16)
#define D_TO_BF16_K SBDTOBF16_K
#define D_BF16_TO_K DBF16TOD_K
#define S_TO_BF16_K SBSTOBF16_K
#define S_BF16_TO_K SBF16TOS_K
#define AMAX_K SAMAX_K
#define AMIN_K SAMIN_K
#define MAX_K SMAX_K
#define MIN_K SMIN_K
#define IAMAX_K ISAMAX_K
#define IAMIN_K ISAMIN_K
#define IMAX_K ISMAX_K
#define IMIN_K ISMIN_K
#define ASUM_K SASUM_K
#define DOTU_K SDOTU_K
#define DOTC_K SDOTC_K
#define BF16_DOT_K SBDOT_K
#define AXPYU_K SAXPYU_K
#define AXPYC_K SAXPYC_K
#define AXPBY_K SAXPBY_K
#define SCAL_K SSCAL_K
#define GEMV_N SGEMV_N
#define GEMV_T SGEMV_T
#define SYMV_U SSYMV_U
#define SYMV_L SSYMV_L
#define GERU_K SGERU_K
#define GERC_K SGERC_K
#define GERV_K SGERV_K
#define GERD_K SGERD_K
#define SUM_K SSUM_K
#define SWAP_K SSWAP_K
#define ROT_K SROT_K
#define COPY_K SCOPY_K
#define NRM2_K SNRM2_K
#define SYMV_THREAD_U SSYMV_THREAD_U
#define SYMV_THREAD_L SSYMV_THREAD_L
#define GEMM_BETA SBGEMM_BETA
#define GEMM_KERNEL_N SBGEMM_KERNEL
#define GEMM_KERNEL_L SBGEMM_KERNEL
#define GEMM_KERNEL_R SBGEMM_KERNEL
#define GEMM_KERNEL_B SBGEMM_KERNEL
#define GEMM_NN SBGEMM_NN
#define GEMM_CN SBGEMM_TN
#define GEMM_TN SBGEMM_TN
#define GEMM_NC SBGEMM_NT
#define GEMM_NT SBGEMM_NT
#define GEMM_CC SBGEMM_TT
#define GEMM_CT SBGEMM_TT
#define GEMM_TC SBGEMM_TT
#define GEMM_TT SBGEMM_TT
#define GEMM_NR SBGEMM_NN
#define GEMM_TR SBGEMM_TN
#define GEMM_CR SBGEMM_TN
#define GEMM_RN SBGEMM_NN
#define GEMM_RT SBGEMM_NT
#define GEMM_RC SBGEMM_NT
#define GEMM_RR SBGEMM_NN
#define GEMM_ONCOPY SBGEMM_ONCOPY
#define GEMM_OTCOPY SBGEMM_OTCOPY
#define GEMM_INCOPY SBGEMM_INCOPY
#define GEMM_ITCOPY SBGEMM_ITCOPY
#define SYMM_THREAD_LU SSYMM_THREAD_LU
#define SYMM_THREAD_LL SSYMM_THREAD_LL
#define SYMM_THREAD_RU SSYMM_THREAD_RU
#define SYMM_THREAD_RL SSYMM_THREAD_RL
#define SYMM_LU SSYMM_LU
#define SYMM_LL SSYMM_LL
#define SYMM_RU SSYMM_RU
#define SYMM_RL SSYMM_RL
#define HEMM_THREAD_LU SHEMM_THREAD_LU
#define HEMM_THREAD_LL SHEMM_THREAD_LL
#define HEMM_THREAD_RU SHEMM_THREAD_RU
#define HEMM_THREAD_RL SHEMM_THREAD_RL
#define GEMM_THREAD_NN SBGEMM_THREAD_NN
#define GEMM_THREAD_CN SBGEMM_THREAD_TN
#define GEMM_THREAD_TN SBGEMM_THREAD_TN
#define GEMM_THREAD_NC SBGEMM_THREAD_NT
#define GEMM_THREAD_NT SBGEMM_THREAD_NT
#define GEMM_THREAD_CC SBGEMM_THREAD_TT
#define GEMM_THREAD_CT SBGEMM_THREAD_TT
#define GEMM_THREAD_TC SBGEMM_THREAD_TT
#define GEMM_THREAD_TT SBGEMM_THREAD_TT
#define GEMM_THREAD_NR SBGEMM_THREAD_NN
#define GEMM_THREAD_TR SBGEMM_THREAD_TN
#define GEMM_THREAD_CR SBGEMM_THREAD_TN
#define GEMM_THREAD_RN SBGEMM_THREAD_NN
#define GEMM_THREAD_RT SBGEMM_THREAD_NT
#define GEMM_THREAD_RC SBGEMM_THREAD_NT
#define GEMM_THREAD_RR SBGEMM_THREAD_NN
#ifdef UNIT
#define TRMM_OUNCOPY STRMM_OUNUCOPY
#define TRMM_OUTCOPY STRMM_OUTUCOPY
#define TRMM_OLNCOPY STRMM_OLNUCOPY
#define TRMM_OLTCOPY STRMM_OLTUCOPY
#define TRSM_OUNCOPY STRSM_OUNUCOPY
#define TRSM_OUTCOPY STRSM_OUTUCOPY
#define TRSM_OLNCOPY STRSM_OLNUCOPY
#define TRSM_OLTCOPY STRSM_OLTUCOPY
#define TRMM_IUNCOPY STRMM_IUNUCOPY
#define TRMM_IUTCOPY STRMM_IUTUCOPY
#define TRMM_ILNCOPY STRMM_ILNUCOPY
#define TRMM_ILTCOPY STRMM_ILTUCOPY
#define TRSM_IUNCOPY STRSM_IUNUCOPY
#define TRSM_IUTCOPY STRSM_IUTUCOPY
#define TRSM_ILNCOPY STRSM_ILNUCOPY
#define TRSM_ILTCOPY STRSM_ILTUCOPY
#else
#define TRMM_OUNCOPY STRMM_OUNNCOPY
#define TRMM_OUTCOPY STRMM_OUTNCOPY
#define TRMM_OLNCOPY STRMM_OLNNCOPY
#define TRMM_OLTCOPY STRMM_OLTNCOPY
#define TRSM_OUNCOPY STRSM_OUNNCOPY
#define TRSM_OUTCOPY STRSM_OUTNCOPY
#define TRSM_OLNCOPY STRSM_OLNNCOPY
#define TRSM_OLTCOPY STRSM_OLTNCOPY
#define TRMM_IUNCOPY STRMM_IUNNCOPY
#define TRMM_IUTCOPY STRMM_IUTNCOPY
#define TRMM_ILNCOPY STRMM_ILNNCOPY
#define TRMM_ILTCOPY STRMM_ILTNCOPY
#define TRSM_IUNCOPY STRSM_IUNNCOPY
#define TRSM_IUTCOPY STRSM_IUTNCOPY
#define TRSM_ILNCOPY STRSM_ILNNCOPY
#define TRSM_ILTCOPY STRSM_ILTNCOPY
#define TRMM_KERNEL_LN STRMM_KERNEL_LN
#define TRMM_KERNEL_LT STRMM_KERNEL_LT
#define TRMM_KERNEL_LR STRMM_KERNEL_LN
#define TRMM_KERNEL_LC STRMM_KERNEL_LT
#define TRMM_KERNEL_RN STRMM_KERNEL_RN
#define TRMM_KERNEL_RT STRMM_KERNEL_RT
#define TRMM_KERNEL_RR STRMM_KERNEL_RN
#define TRMM_KERNEL_RC STRMM_KERNEL_RT
#define TRSM_KERNEL_LN STRSM_KERNEL_LN
#define TRSM_KERNEL_LT STRSM_KERNEL_LT
#define TRSM_KERNEL_LR STRSM_KERNEL_LN
#define TRSM_KERNEL_LC STRSM_KERNEL_LT
#define TRSM_KERNEL_RN STRSM_KERNEL_RN
#define TRSM_KERNEL_RT STRSM_KERNEL_RT
#define TRSM_KERNEL_RR STRSM_KERNEL_RN
#define TRSM_KERNEL_RC STRSM_KERNEL_RT
#define SYMM_IUTCOPY SSYMM_IUTCOPY
#define SYMM_ILTCOPY SSYMM_ILTCOPY
#define SYMM_OUTCOPY SSYMM_OUTCOPY
#define SYMM_OLTCOPY SSYMM_OLTCOPY
#define TRMM_LNUU STRMM_LNUU
#define TRMM_LNUN STRMM_LNUN
#define TRMM_LNLU STRMM_LNLU
#define TRMM_LNLN STRMM_LNLN
#define TRMM_LTUU STRMM_LTUU
#define TRMM_LTUN STRMM_LTUN
#define TRMM_LTLU STRMM_LTLU
#define TRMM_LTLN STRMM_LTLN
#define TRMM_LRUU STRMM_LNUU
#define TRMM_LRUN STRMM_LNUN
#define TRMM_LRLU STRMM_LNLU
#define TRMM_LRLN STRMM_LNLN
#define TRMM_LCUU STRMM_LTUU
#define TRMM_LCUN STRMM_LTUN
#define TRMM_LCLU STRMM_LTLU
#define TRMM_LCLN STRMM_LTLN
#define TRMM_RNUU STRMM_RNUU
#define TRMM_RNUN STRMM_RNUN
#define TRMM_RNLU STRMM_RNLU
#define TRMM_RNLN STRMM_RNLN
#define TRMM_RTUU STRMM_RTUU
#define TRMM_RTUN STRMM_RTUN
#define TRMM_RTLU STRMM_RTLU
#define TRMM_RTLN STRMM_RTLN
#define TRMM_RRUU STRMM_RNUU
#define TRMM_RRUN STRMM_RNUN
#define TRMM_RRLU STRMM_RNLU
#define TRMM_RRLN STRMM_RNLN
#define TRMM_RCUU STRMM_RTUU
#define TRMM_RCUN STRMM_RTUN
#define TRMM_RCLU STRMM_RTLU
#define TRMM_RCLN STRMM_RTLN
#define TRSM_LNUU STRSM_LNUU
#define TRSM_LNUN STRSM_LNUN
#define TRSM_LNLU STRSM_LNLU
#define TRSM_LNLN STRSM_LNLN
#define TRSM_LTUU STRSM_LTUU
#define TRSM_LTUN STRSM_LTUN
#define TRSM_LTLU STRSM_LTLU
#define TRSM_LTLN STRSM_LTLN
#define TRSM_LRUU STRSM_LNUU
#define TRSM_LRUN STRSM_LNUN
#define TRSM_LRLU STRSM_LNLU
#define TRSM_LRLN STRSM_LNLN
#define TRSM_LCUU STRSM_LTUU
#define TRSM_LCUN STRSM_LTUN
#define TRSM_LCLU STRSM_LTLU
#define TRSM_LCLN STRSM_LTLN
#define TRSM_RNUU STRSM_RNUU
#define TRSM_RNUN STRSM_RNUN
#define TRSM_RNLU STRSM_RNLU
#define TRSM_RNLN STRSM_RNLN
#define TRSM_RTUU STRSM_RTUU
#define TRSM_RTUN STRSM_RTUN
#define TRSM_RTLU STRSM_RTLU
#define TRSM_RTLN STRSM_RTLN
#define TRSM_RRUU STRSM_RNUU
#define TRSM_RRUN STRSM_RNUN
#define TRSM_RRLU STRSM_RNLU
#define TRSM_RRLN STRSM_RNLN
#define TRSM_RCUU STRSM_RTUU
#define TRSM_RCUN STRSM_RTUN
#define TRSM_RCLU STRSM_RTLU
#define TRSM_RCLN STRSM_RTLN
#define SYRK_UN SSYRK_UN
#define SYRK_UT SSYRK_UT
#define SYRK_LN SSYRK_LN
#define SYRK_LT SSYRK_LT
#define SYRK_UR SSYRK_UN
#define SYRK_UC SSYRK_UT
#define SYRK_LR SSYRK_LN
#define SYRK_LC SSYRK_LT
#define SYRK_KERNEL_U SSYRK_KERNEL_U
#define SYRK_KERNEL_L SSYRK_KERNEL_L
#define HERK_UN SSYRK_UN
#define HERK_LN SSYRK_LN
#define HERK_UC SSYRK_UT
#define HERK_LC SSYRK_LT
#define HER2K_UN SSYR2K_UN
#define HER2K_LN SSYR2K_LN
#define HER2K_UC SSYR2K_UT
#define HER2K_LC SSYR2K_LT
#define SYR2K_UN SSYR2K_UN
#define SYR2K_UT SSYR2K_UT
#define SYR2K_LN SSYR2K_LN
#define SYR2K_LT SSYR2K_LT
#define SYR2K_UR SSYR2K_UN
#define SYR2K_UC SSYR2K_UT
#define SYR2K_LR SSYR2K_LN
#define SYR2K_LC SSYR2K_LT
#define SYR2K_KERNEL_U SSYR2K_KERNEL_U
#define SYR2K_KERNEL_L SSYR2K_KERNEL_L
#define SYRK_THREAD_UN SSYRK_THREAD_UN
#define SYRK_THREAD_UT SSYRK_THREAD_UT
#define SYRK_THREAD_LN SSYRK_THREAD_LN
#define SYRK_THREAD_LT SSYRK_THREAD_LT
#define SYRK_THREAD_UR SSYRK_THREAD_UR
#define SYRK_THREAD_UC SSYRK_THREAD_UC
#define SYRK_THREAD_LR SSYRK_THREAD_LN
#define SYRK_THREAD_LC SSYRK_THREAD_LT
#define HERK_THREAD_UN SSYRK_THREAD_UN
#define HERK_THREAD_UT SSYRK_THREAD_UT
#define HERK_THREAD_LN SSYRK_THREAD_LN
#define HERK_THREAD_LT SSYRK_THREAD_LT
#define HERK_THREAD_UR SSYRK_THREAD_UR
#define HERK_THREAD_UC SSYRK_THREAD_UC
#define HERK_THREAD_LR SSYRK_THREAD_LN
#define HERK_THREAD_LC SSYRK_THREAD_LT
#define OMATCOPY_K_CN SOMATCOPY_K_CN
#define OMATCOPY_K_RN SOMATCOPY_K_RN
#define OMATCOPY_K_CT SOMATCOPY_K_CT
#define OMATCOPY_K_RT SOMATCOPY_K_RT
#define IMATCOPY_K_CN SIMATCOPY_K_CN
#define IMATCOPY_K_RN SIMATCOPY_K_RN
#define IMATCOPY_K_CT SIMATCOPY_K_CT
#define IMATCOPY_K_RT SIMATCOPY_K_RT
#define GEADD_K SGEADD_K
#endif
#else
#define AMAX_K SAMAX_K
@ -673,14 +962,14 @@
#define GEMV_S SGEMV_S
#define GEMV_D SGEMV_D
#define SYMV_U SSYMV_U
#define SYMV_L SSYMV_L
#define GERU_K SGERU_K
#define GERC_K SGERC_K
#define GERV_K SGERV_K
#define GERD_K SGERD_K
#define SYMV_U SSYMV_U
#define SYMV_L SSYMV_L
#define SYMV_THREAD_U SSYMV_THREAD_U
#define SYMV_THREAD_L SSYMV_THREAD_L
@ -2202,6 +2491,9 @@
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
extern BLASLONG gemm_offset_a;
extern BLASLONG gemm_offset_b;
extern BLASLONG sbgemm_p;
extern BLASLONG sbgemm_q;
extern BLASLONG sbgemm_r;
extern BLASLONG sgemm_p;
extern BLASLONG sgemm_q;
extern BLASLONG sgemm_r;

Some files were not shown because too many files have changed in this diff Show More