Compare commits
1 Commits
v0.3.7
...
optimized_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
92058a75e2 |
143
.drone.yml
143
.drone.yml
@@ -1,143 +0,0 @@
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_gcc_make
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:19.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm32_gcc_make
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:19.04
|
||||
environment:
|
||||
CC: gcc
|
||||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV6 NUM_THREADS=32'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_clang_make
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: clang
|
||||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran perl
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm32_clang_cmake
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: clang
|
||||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV6 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
|
||||
commands:
|
||||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC g++ perl cmake
|
||||
- $CC --version
|
||||
- mkdir build && cd build
|
||||
- cmake $CMAKE_FLAGS ..
|
||||
- make -j
|
||||
- ctest
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_gcc_cmake
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: gcc
|
||||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
|
||||
commands:
|
||||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC g++ perl cmake
|
||||
- $CC --version
|
||||
- mkdir build && cd build
|
||||
- cmake $CMAKE_FLAGS ..
|
||||
- make -j
|
||||
- ctest
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_clang_cmake
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:18.04
|
||||
environment:
|
||||
CC: clang
|
||||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
|
||||
commands:
|
||||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC g++ perl cmake
|
||||
- $CC --version
|
||||
- mkdir build && cd build
|
||||
- cmake $CMAKE_FLAGS ..
|
||||
- make -j
|
||||
- ctest
|
||||
19
.gitignore
vendored
19
.gitignore
vendored
@@ -5,7 +5,6 @@
|
||||
*.def
|
||||
*.o
|
||||
*.out
|
||||
*.tmp
|
||||
lapack-3.1.1
|
||||
lapack-3.1.1.tgz
|
||||
lapack-3.4.1
|
||||
@@ -15,21 +14,6 @@ lapack-3.4.2.tgz
|
||||
lapack-netlib/make.inc
|
||||
lapack-netlib/lapacke/include/lapacke_mangling.h
|
||||
lapack-netlib/TESTING/testing_results.txt
|
||||
lapack-netlib/INSTALL/test*
|
||||
lapack-netlib/TESTING/xeigtstc
|
||||
lapack-netlib/TESTING/xeigtstd
|
||||
lapack-netlib/TESTING/xeigtsts
|
||||
lapack-netlib/TESTING/xeigtstz
|
||||
lapack-netlib/TESTING/xlintstc
|
||||
lapack-netlib/TESTING/xlintstd
|
||||
lapack-netlib/TESTING/xlintstds
|
||||
lapack-netlib/TESTING/xlintstrfc
|
||||
lapack-netlib/TESTING/xlintstrfd
|
||||
lapack-netlib/TESTING/xlintstrfs
|
||||
lapack-netlib/TESTING/xlintstrfz
|
||||
lapack-netlib/TESTING/xlintsts
|
||||
lapack-netlib/TESTING/xlintstz
|
||||
lapack-netlib/TESTING/xlintstzc
|
||||
*.so
|
||||
*.so.*
|
||||
*.a
|
||||
@@ -85,6 +69,3 @@ test/zblat3
|
||||
build
|
||||
build.*
|
||||
*.swp
|
||||
benchmark/*.goto
|
||||
benchmark/smallscaling
|
||||
|
||||
|
||||
203
.travis.yml
203
.travis.yml
@@ -1,184 +1,5 @@
|
||||
# XXX: Precise is already deprecated, new default is Trusty.
|
||||
# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming
|
||||
dist: precise
|
||||
sudo: true
|
||||
language: c
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- &test-ubuntu
|
||||
os: linux
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gfortran
|
||||
before_script: &common-before
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
script:
|
||||
- set -e
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
os: linux-ppc64le
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=PPC64LE_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
compiler: clang
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 CC=clang"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
compiler: clang
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 INTERFACE64=1 CC=clang"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gcc-multilib
|
||||
- gfortran-multilib
|
||||
env:
|
||||
- TARGET_BOX=LINUX32
|
||||
- BTYPE="BINARY=32"
|
||||
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- binutils-mingw-w64-x86-64
|
||||
- gcc-mingw-w64-x86-64
|
||||
- gfortran-mingw-w64-x86-64
|
||||
before_script: *common-before
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- TARGET_BOX=WIN64
|
||||
- BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
||||
# Build & test on Alpine Linux inside chroot, i.e. on system with musl libc.
|
||||
# These jobs needs sudo, so Travis runs them on VM-based infrastructure
|
||||
# which is slower than container-based infrastructure used for jobs
|
||||
# that don't require sudo.
|
||||
- &test-alpine
|
||||
os: linux
|
||||
dist: trusty
|
||||
sudo: true
|
||||
language: minimal
|
||||
before_install:
|
||||
- "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
|
||||
&& echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
|
||||
- alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
|
||||
install:
|
||||
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
|
||||
before_script: *common-before
|
||||
script:
|
||||
- set -e
|
||||
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
|
||||
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
|
||||
- alpine make -C test $COMMON_FLAGS $BTYPE
|
||||
- alpine make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- alpine make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- TARGET_BOX=LINUX64_MUSL
|
||||
- BTYPE="BINARY=64"
|
||||
|
||||
# XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS,
|
||||
# but only on Travis CI, cannot reproduce it elsewhere.
|
||||
#- &test-alpine-openmp
|
||||
# <<: *test-alpine
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-alpine
|
||||
env:
|
||||
- TARGET_BOX=LINUX64_MUSL
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
|
||||
# Build with the same flags as Alpine do in OpenBLAS package.
|
||||
- <<: *test-alpine
|
||||
env:
|
||||
- TARGET_BOX=LINUX64_MUSL
|
||||
- BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"
|
||||
|
||||
- &test-cmake
|
||||
os: linux
|
||||
compiler: clang
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gfortran
|
||||
- cmake
|
||||
dist: trusty
|
||||
sudo: true
|
||||
before_script:
|
||||
- COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
|
||||
script:
|
||||
- set -e
|
||||
- mkdir build
|
||||
- CONFIG=Release
|
||||
- cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
|
||||
- cmake --build build --config $CONFIG -- -j2
|
||||
env:
|
||||
- CMAKE=1
|
||||
- <<: *test-cmake
|
||||
env:
|
||||
- CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1"
|
||||
- <<: *test-cmake
|
||||
compiler: gcc
|
||||
env:
|
||||
- CMAKE=1
|
||||
|
||||
- &test-macos
|
||||
os: osx
|
||||
osx_image: xcode10.1
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
- brew update
|
||||
- brew install gcc # for gfortran
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
|
||||
- <<: *test-macos
|
||||
osx_image: xcode8.3
|
||||
env:
|
||||
- BTYPE="BINARY=32"
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
|
||||
notifications:
|
||||
webhooks:
|
||||
urls:
|
||||
@@ -186,3 +7,27 @@ notifications:
|
||||
on_success: change # options: [always|never|change] default: always
|
||||
on_failure: always # options: [always|never|change] default: always
|
||||
on_start: never # options: [always|never|change] default: always
|
||||
|
||||
compiler:
|
||||
- gcc
|
||||
|
||||
env:
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1"
|
||||
- TARGET_BOX=LINUX32 BTYPE="BINARY=32"
|
||||
- TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq gfortran
|
||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||
|
||||
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
368
CMakeLists.txt
368
CMakeLists.txt
@@ -2,71 +2,51 @@
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
##
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.5)
|
||||
project(OpenBLAS C ASM)
|
||||
cmake_minimum_required(VERSION 2.8.4)
|
||||
project(OpenBLAS)
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 7)
|
||||
set(OpenBLAS_MINOR_VERSION 2)
|
||||
set(OpenBLAS_PATCH_VERSION 16.dev)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
# Adhere to GNU filesystem layout conventions
|
||||
include(GNUInstallDirs)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
enable_language(ASM)
|
||||
enable_language(C)
|
||||
|
||||
if(MSVC)
|
||||
set(OpenBLAS_LIBNAME libopenblas)
|
||||
else()
|
||||
set(OpenBLAS_LIBNAME openblas)
|
||||
endif()
|
||||
|
||||
#######
|
||||
if(MSVC)
|
||||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||
endif()
|
||||
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF)
|
||||
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64 or ppc only)" OFF)
|
||||
option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
|
||||
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
|
||||
else()
|
||||
set(NO_AFFINITY 1)
|
||||
endif()
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoids conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
|
||||
set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in the shared library to avoid conflicts with other BLAS libraries" )
|
||||
set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" )
|
||||
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
|
||||
option(BUILD_DEBUG "Build Debug Version" OFF)
|
||||
#######
|
||||
if(BUILD_WITHOUT_LAPACK)
|
||||
set(NO_LAPACK 1)
|
||||
set(NO_LAPACKE 1)
|
||||
endif()
|
||||
|
||||
if(BUILD_DEBUG)
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
else()
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
|
||||
if(BUILD_WITHOUT_CBLAS)
|
||||
set(NO_CBLAS 1)
|
||||
endif()
|
||||
|
||||
#######
|
||||
|
||||
if(MSVC AND MSVC_STATIC_CRT)
|
||||
set(CompilerFlags
|
||||
CMAKE_CXX_FLAGS
|
||||
CMAKE_CXX_FLAGS_DEBUG
|
||||
CMAKE_CXX_FLAGS_RELEASE
|
||||
CMAKE_C_FLAGS
|
||||
CMAKE_C_FLAGS_DEBUG
|
||||
CMAKE_C_FLAGS_RELEASE
|
||||
)
|
||||
foreach(CompilerFlag ${CompilerFlags})
|
||||
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")
|
||||
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
|
||||
|
||||
set(OpenBLAS_LIBNAME openblas${SUFFIX64_UNDERSCORE})
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
|
||||
|
||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
|
||||
|
||||
@@ -74,15 +54,16 @@ if (NOT DYNAMIC_ARCH)
|
||||
list(APPEND BLASDIRS kernel)
|
||||
endif ()
|
||||
|
||||
if (DEFINED UTEST_CHECK)
|
||||
set(SANITY_CHECK 1)
|
||||
endif ()
|
||||
|
||||
if (DEFINED SANITY_CHECK)
|
||||
list(APPEND BLASDIRS reference)
|
||||
endif ()
|
||||
|
||||
set(SUBDIRS ${BLASDIRS})
|
||||
if (NOT NO_LAPACK)
|
||||
if(BUILD_RELAPACK)
|
||||
list(APPEND SUBDIRS relapack/src)
|
||||
endif()
|
||||
list(APPEND SUBDIRS lapack)
|
||||
endif ()
|
||||
|
||||
@@ -95,10 +76,6 @@ if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_
|
||||
set(BUILD_COMPLEX16 true)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED BUILD_MATGEN)
|
||||
set(BUILD_MATGEN true)
|
||||
endif()
|
||||
|
||||
set(FLOAT_TYPES "")
|
||||
if (BUILD_SINGLE)
|
||||
message(STATUS "Building Single Precision")
|
||||
@@ -120,17 +97,19 @@ if (BUILD_COMPLEX16)
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||
endif ()
|
||||
|
||||
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
|
||||
|
||||
# all :: libs netlib tests shared
|
||||
|
||||
# libs :
|
||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
|
||||
endif ()
|
||||
|
||||
#Set default output directory
|
||||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
if(MSVC)
|
||||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib/Debug)
|
||||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib/Release)
|
||||
if (${NO_STATIC} AND ${NO_SHARED})
|
||||
message(FATAL_ERROR "Neither static nor shared are enabled.")
|
||||
endif ()
|
||||
|
||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
|
||||
set(TARGET_OBJS "")
|
||||
foreach (SUBDIR ${SUBDIRS})
|
||||
@@ -144,251 +123,68 @@ endforeach ()
|
||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
|
||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
||||
if (NOT NOFORTRAN AND NOT NO_LAPACK)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
|
||||
if (NOT NO_LAPACKE)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||
endif ()
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
|
||||
if (NOT NO_LAPACKE)
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# Only generate .def for dll on MSVC and always produce pdb files for debug and release
|
||||
#Only generate .def for dll on MSVC
|
||||
if(MSVC)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
|
||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
|
||||
endif()
|
||||
|
||||
if (${DYNAMIC_ARCH})
|
||||
add_subdirectory(kernel)
|
||||
foreach(TARGET_CORE ${DYNAMIC_CORE})
|
||||
message("${TARGET_CORE}")
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:kernel_${TARGET_CORE}>")
|
||||
endforeach()
|
||||
endif ()
|
||||
|
||||
# add objects to the openblas lib
|
||||
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
|
||||
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
|
||||
# Android needs to explicitly link against libm
|
||||
if(ANDROID)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} m)
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
|
||||
|
||||
|
||||
if(NOT MSVC)
|
||||
#only build shared library for MSVC
|
||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
|
||||
if(SMP)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
|
||||
endif()
|
||||
|
||||
# Handle MSVC exports
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
|
||||
else()
|
||||
# Creates verbose .def file (51KB vs 18KB)
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Set output for libopenblas
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES EXPORT_NAME "OpenBLAS")
|
||||
|
||||
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
|
||||
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
|
||||
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
endforeach()
|
||||
|
||||
#build test and ctest
|
||||
enable_testing()
|
||||
|
||||
if (USE_THREAD)
|
||||
# Add threading library to linker
|
||||
find_package(Threads)
|
||||
if (THREADS_HAVE_PTHREAD_ARG)
|
||||
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY COMPILE_OPTIONS "-pthread")
|
||||
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread")
|
||||
endif()
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
|
||||
add_subdirectory(test)
|
||||
if(NOT NO_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#if (MSVC OR NOT NOFORTRAN)
|
||||
if (NOT NO_CBLAS)
|
||||
# Broken without fortran on unix
|
||||
add_subdirectory(utest)
|
||||
endif()
|
||||
|
||||
if (NOT MSVC AND NOT NOFORTRAN)
|
||||
# Build test and ctest
|
||||
add_subdirectory(test)
|
||||
if(NOT NO_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
|
||||
SOVERSION ${OpenBLAS_MAJOR_VERSION}
|
||||
)
|
||||
|
||||
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
|
||||
if (NOT MSVC)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
|
||||
else()
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} "/FORCE:MULTIPLE")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
|
||||
if (NOT DEFINED ARCH)
|
||||
set(ARCH_IN "x86_64")
|
||||
else()
|
||||
set(ARCH_IN ${ARCH})
|
||||
endif()
|
||||
|
||||
if (${CORE} STREQUAL "generic")
|
||||
set(ARCH_IN "GENERIC")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED EXPRECISION)
|
||||
set(EXPRECISION_IN 0)
|
||||
else()
|
||||
set(EXPRECISION_IN ${EXPRECISION})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_CBLAS)
|
||||
set(NO_CBLAS_IN 0)
|
||||
else()
|
||||
set(NO_CBLAS_IN ${NO_CBLAS})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACK)
|
||||
set(NO_LAPACK_IN 0)
|
||||
else()
|
||||
set(NO_LAPACK_IN ${NO_LAPACK})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACKE)
|
||||
set(NO_LAPACKE_IN 0)
|
||||
else()
|
||||
set(NO_LAPACKE_IN ${NO_LAPACKE})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NEED2UNDERSCORES)
|
||||
set(NEED2UNDERSCORES_IN 0)
|
||||
else()
|
||||
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED ONLY_CBLAS)
|
||||
set(ONLY_CBLAS_IN 0)
|
||||
else()
|
||||
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED BU)
|
||||
set(BU _)
|
||||
endif()
|
||||
|
||||
if (NOT ${SYMBOLPREFIX} STREQUAL "")
|
||||
message(STATUS "adding prefix ${SYMBOLPREFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
|
||||
endif()
|
||||
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
|
||||
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
|
||||
endif()
|
||||
add_custom_command(TARGET ${OpenBLAS_LIBNAME} POST_BUILD
|
||||
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" > ${PROJECT_BINARY_DIR}/objcopy.def
|
||||
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
|
||||
COMMENT "renaming symbols"
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
# Install project
|
||||
|
||||
# Install libraries
|
||||
install(TARGETS ${OpenBLAS_LIBNAME}
|
||||
EXPORT "OpenBLAS${SUFFIX64}Targets"
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
|
||||
# Install headers
|
||||
set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
|
||||
set(CMAKE_INSTALL_FULL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h)
|
||||
file(WRITE ${OPENBLAS_CONFIG_H} "#ifndef OPENBLAS_CONFIG_H\n")
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_CONFIG_H\n")
|
||||
file(STRINGS ${PROJECT_BINARY_DIR}/config.h __lines)
|
||||
foreach(line ${__lines})
|
||||
string(REPLACE "#define " "" line ${line})
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_${line}\n")
|
||||
endforeach()
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_VERSION \"OpenBLAS ${OpenBLAS_VERSION}\"\n")
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS)
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "${OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS}\n")
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#endif /* OPENBLAS_CONFIG_H */\n")
|
||||
install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
if(NOT NOFORTRAN)
|
||||
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
set(F77BLAS_H ${CMAKE_BINARY_DIR}/generated/f77blas.h)
|
||||
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n")
|
||||
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n")
|
||||
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n")
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS)
|
||||
file(APPEND ${F77BLAS_H} "${COMMON_INTERFACE_H_CONTENTS}\n")
|
||||
file(APPEND ${F77BLAS_H} "#endif")
|
||||
install (FILES ${F77BLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT NO_CBLAS)
|
||||
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
|
||||
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
|
||||
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT NO_LAPACKE)
|
||||
message (STATUS "Copying LAPACKE header files to ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
add_dependencies( ${OpenBLAS_LIBNAME} genlapacke)
|
||||
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
|
||||
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
ADD_CUSTOM_TARGET(genlapacke
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
|
||||
)
|
||||
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
|
||||
endif()
|
||||
|
||||
include(FindPkgConfig QUIET)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
|
||||
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
|
||||
endif()
|
||||
|
||||
|
||||
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
|
||||
set(PN OpenBLAS)
|
||||
set(CMAKECONFIG_INSTALL_DIR "share/cmake/${PN}${SUFFIX64}")
|
||||
configure_package_config_file(cmake/${PN}Config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake"
|
||||
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
|
||||
VERSION ${${PN}_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
|
||||
RENAME ${PN}${SUFFIX64}ConfigVersion.cmake
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
install(EXPORT "${PN}${SUFFIX64}Targets"
|
||||
NAMESPACE "${PN}${SUFFIX64}::"
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
# TODO: Why is the config saved here? Is this necessary with CMake?
|
||||
#Save the config files for installation
|
||||
# @cp Makefile.conf Makefile.conf_last
|
||||
# @cp config.h config_last.h
|
||||
#ifdef QUAD_PRECISION
|
||||
# @echo "#define QUAD_PRECISION">> config_last.h
|
||||
#endif
|
||||
#ifeq ($(EXPRECISION), 1)
|
||||
# @echo "#define EXPRECISION">> config_last.h
|
||||
#endif
|
||||
###
|
||||
#ifeq ($(DYNAMIC_ARCH), 1)
|
||||
# @$(MAKE) -C kernel commonlibs || exit 1
|
||||
# @for d in $(DYNAMIC_CORE) ; \
|
||||
# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
# done
|
||||
# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
#endif
|
||||
#ifdef USE_THREAD
|
||||
# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
#endif
|
||||
# @touch lib.grd
|
||||
|
||||
|
||||
@@ -121,17 +121,6 @@ In chronological order:
|
||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||
ARMv8 support.
|
||||
|
||||
* Jerome Robert <jeromerobert@gmx.com>
|
||||
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
|
||||
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
|
||||
* [2015-12-28] Allow to force the number of parallel make job
|
||||
* [2015-12-28] Fix detection of AMD E2-3200 detection
|
||||
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
|
||||
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
|
||||
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
|
||||
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
|
||||
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)
|
||||
|
||||
* Dan Kortschak
|
||||
* [2015-01-07] Added test for drotmg bug #484.
|
||||
|
||||
@@ -141,33 +130,5 @@ In chronological order:
|
||||
* Martin Koehler <https://github.com/grisuthedragon/>
|
||||
* [2015-09-07] Improved imatcopy
|
||||
|
||||
* Ashwin Sekhar T K <https://github.com/ashwinyes/>
|
||||
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
|
||||
* [2015-11-20] lapack-test fixes for Cortex-A57
|
||||
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
|
||||
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
|
||||
|
||||
* theoractice <https://github.com/theoractice/>
|
||||
* [2016-03-20] Fix compiler error in VisualStudio with CMake
|
||||
* [2016-03-22] Fix access violation on Windows while static linking
|
||||
|
||||
* Paul Mustière <https://github.com/buffer51/>
|
||||
* [2016-02-04] Fix Android build on ARMV7
|
||||
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8
|
||||
|
||||
* Shivraj Patil <https://github.com/sva-img/>
|
||||
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA
|
||||
|
||||
* Kaustubh Raste <https://github.com/ksraste/>
|
||||
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA
|
||||
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA
|
||||
|
||||
* Abdelrauf <https://github.com/quickwritereader>
|
||||
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
|
||||
* [2017-02-26] ztrmm kernel for IBM z13
|
||||
* [2017-03-13] strmm and ctrmm kernel for IBM z13
|
||||
* [2017-09-01] initial Blas Level-1,2 (double precision) for IBM z13
|
||||
* [2018-03-07] added missing Blas Level 1-2 (double precision) simd codes
|
||||
* [2019-02-01] added missing Blas Level-1,2 (single precision) simd codes
|
||||
* [2019-03-14] power9 dgemm/dtrmm kernel
|
||||
* [2019-04-29] power9 sgemm/strmm kernel
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
|
||||
500
Changelog.txt
500
Changelog.txt
@@ -1,504 +1,4 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.7
|
||||
11-Aug 2019
|
||||
|
||||
common:
|
||||
* having the gmake special variables TARGET_ARCH or TARGET_MACH
|
||||
defined no longer causes build failures in ctest or utest
|
||||
* defining NO_AFFINITY or USE_TLS to 0 in gmake builds no longer
|
||||
has the same effect as setting them to 1
|
||||
* a new test program was added to allow checking the library for
|
||||
thread safety
|
||||
* a new option USE_LOCKING was added to ensure thread safety when
|
||||
OpenBLAS itself is built without multithreading but will be
|
||||
called from multiple threads.
|
||||
* a build failure on Linux with glibc versions earlier than 2.5
|
||||
was fixed
|
||||
* a runtime error with CPU enumeration (and NO_AFFINITY not set)
|
||||
on glibc 2.6 was fixed
|
||||
* NO_AFFINITY was added to the CMAKE options (and defaults to being
|
||||
active on Linux, as in the gmake builds)
|
||||
|
||||
x86_64:
|
||||
* the build-time logic for detection of AVX512 availability in
|
||||
the processor and compiler was fixed
|
||||
* gmake builds on OSX now set the internal name of the library to
|
||||
libopenblas.0.dylib (consistent with CMAKE)
|
||||
* the Haswell DGEMM kernel received a significant speedup through
|
||||
improved prefetch and load instructions
|
||||
* performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 was markedly
|
||||
increased by avoiding vpermpd instructions
|
||||
* the SKYLAKEX (AVX512) DGEMM helper functions have now been disabled
|
||||
to fix remaining errors in DGEMM, DSYMM and DTRMM
|
||||
|
||||
## POWER:
|
||||
* added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970
|
||||
* added optimized kernels for POWER9 single and double precision complex BLAS3
|
||||
* added optimized kernels for POWER9 SGEMM and STRMM
|
||||
|
||||
## ARMV7:
|
||||
* fixed the softfp implementations of xAMAX and IxAMAX
|
||||
* removed the predefined -march= flags on both ARMV5 and ARMV6 as
|
||||
they were appropriate for only a subset of platforms
|
||||
|
||||
====================================================================
|
||||
Version 0.3.6
|
||||
29-Apr-2019
|
||||
|
||||
common:
|
||||
* the build tools now check that a given cpu TARGET is actually valid
|
||||
* the build-time check of system features (c_check) has been made
|
||||
less dependent on particular perl features (this should mainly
|
||||
benefit building on Windows)
|
||||
* several problem with the ReLAPACK integration were fixed,
|
||||
including INTERFACE64 support and building a shared library
|
||||
* building with CMAKE on BSD systems was improved
|
||||
* a non-absolute SUM function was added based on the
|
||||
existing optimized code for ASUM
|
||||
* CBLAS interfaces to the IxMIN and IxMAX functions were added
|
||||
* a name clash between LAPACKE and BOOST headers was resolved
|
||||
* CMAKE builds with OpenMP failed to include the appropriate getrf_parallel
|
||||
kernels
|
||||
* a crash on thread (key) deletion with the USE_TLS=1 memory management
|
||||
option was fixed
|
||||
* restored several earlier fixes, in particular for OpenMP performance,
|
||||
building on BSD, and calling fork on CYGWIN, which had inadvertently
|
||||
been dropped in the 0.3.3 rewrite of the memory management code.
|
||||
|
||||
x86_64:
|
||||
* the AVX512 DGEMM kernel has been disabled again due to unsolved problems
|
||||
* building with old versions of MSVC was fixed
|
||||
* it is now possible to build a static library on Windows with CMAKE
|
||||
* accessing environment variables on CYGWIN at run time was fixed
|
||||
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
|
||||
* Intel "Denverton" atom and Hygon "Dhyana" zen CPUs are now autodetected
|
||||
* building for DYNAMIC_ARCH with a DYNAMIC_LIST of targets is now supported
|
||||
with CMAKE as well
|
||||
* building for DYNAMIC_ARCH with GENERIC as the default target is now supported
|
||||
* a buffer overflow in the SSE GEMM kernel for Intel Nano targets was fixed
|
||||
* assembly bugs involving undeclared modification of input operands were fixed
|
||||
in the AXPY, DOT, GEMV, GER, SCAL, SYMV and TRSM microkernels for Nehalem,
|
||||
Sandybridge, Haswell, Bulldozer and Piledriver. These would typically cause
|
||||
test failures or segfaults when compiled with recent versions of gcc from 8 onward.
|
||||
* a similar bug was fixed in the blas_quickdivide code used to split workloads
|
||||
in most functions
|
||||
* a bug in the IxMIN implementation for the GENERIC target made it return the result of IxMAX
|
||||
* fixed building on SkylakeX systems when either the compiler or the (emulated) operating
|
||||
environment does not support AVX512
|
||||
* improved GEMM performance on ZEN targets
|
||||
|
||||
x86:
|
||||
* build failures caused by the recently added checks for AVX512 were fixed
|
||||
* an inline assembly bug involving undeclared modification of an input argument was
|
||||
fixed in the blas_quickdivide code used to split workloads in most functions
|
||||
* a bug in the IMIN implementation for the GENERIC target made it return the result of IMAX
|
||||
|
||||
MIPS32:
|
||||
* a bug in the IMIN implementation made it return the result of IMAX
|
||||
|
||||
POWER:
|
||||
* single precision BLAS1/2 functions have received optimized POWER8 kernels
|
||||
* POWER9 is now a separate target, with an optimized DGEMM/DTRMM kernel
|
||||
* building on PPC970 systems under OSX Leopard or Tiger is now supported
|
||||
* out-of-bounds memory accesses in the gemm_beta microkernels were fixed
|
||||
* building a shared library on AIX is now supported for POWER6
|
||||
* DYNAMIC_ARCH support has been added for POWER6 and newer
|
||||
|
||||
ARMv7:
|
||||
* corrected xDOT behaviour with zero INC_X or INC_Y
|
||||
* a bug in the IMIN implementation made it return the result of IMAX
|
||||
|
||||
ARMv8:
|
||||
* added support for HiSilicon TSV110 cpus
|
||||
* the CMAKE build system now recognizes 32bit userspace on 64bit hardware
|
||||
* cross-compilation with CMAKE now works again
|
||||
* a bug in the IMIN implementation made it return the result of IMAX
|
||||
* ARMV8 builds with the BINARY=32 option are now automatically handled as ARMV7
|
||||
|
||||
IBM Z:
|
||||
* optimized microkernels for single precicion BLAS1/2 functions have been added
|
||||
for both Z13 and Z14
|
||||
|
||||
====================================================================
|
||||
Version 0.3.5
|
||||
31-Dec-2018
|
||||
|
||||
common:
|
||||
* loop unrolling in TRMV has been enabled again.
|
||||
* A domain error in the thread workload distribution for SYRK
|
||||
has been fixed.
|
||||
* gmake builds will now automatically add -fPIC to the build
|
||||
options if the platform requires it.
|
||||
* a pthreads key leakage (and associate crash on dlclose) in
|
||||
the USE_TLS codepath was fixed.
|
||||
* building of the utest cases on systems that do not provide
|
||||
an implementation of complex.h was fixed.
|
||||
|
||||
x86_64:
|
||||
* the SkylakeX code was changed to compile on OSX.
|
||||
* unwanted application of the -march=skylake-avx512 option
|
||||
to the common code parts of a DYNAMIC_ARCH build was fixed.
|
||||
* improved performance of SGEMM for small workloads on Skylake X.
|
||||
* performance of SGEMM and DGEMM was improved on Haswell.
|
||||
|
||||
ARMV8:
|
||||
* a configuration error that broke the CNRM2 kernel was corrected.
|
||||
* compilation of the GEMM kernels with CMAKE was fixed.
|
||||
* DYNAMIC_ARCH builds are now available with CMAKE as well.
|
||||
* using CMAKE for cross-compilation to the new cpu TARGETs
|
||||
introduced in 0.3.4 now works.
|
||||
|
||||
POWER:
|
||||
* a problem in cpu autodetection for AIX has been corrected.
|
||||
|
||||
====================================================================
|
||||
Version 0.3.4
|
||||
02-Dec-2018
|
||||
|
||||
common:
|
||||
* the new, experimental thread-local memory allocation had
|
||||
inadvertently been left enabled for gmake builds in 0.3.3
|
||||
despite the announcement. It is now disabled by default, and
|
||||
single-threaded builds will keep using the old allocator even
|
||||
if the USE_TLS option is turned on.
|
||||
* OpenBLAS will now provide enough buffer space for at least 50
|
||||
threads by default.
|
||||
* The output of openblas_get_config() now contains the version
|
||||
number.
|
||||
* A serious thread safety bug in GEMV operation with small M and
|
||||
large N size has been fixed.
|
||||
* The code will now automatically call blas_thread_init after a
|
||||
fork if needed before handling a call to openblas_set_num_threads
|
||||
* Accesses to parallelized level3 functions from multiple callers
|
||||
are now serialized to avoid thread races (unless using OpenMP).
|
||||
This should provide better performance than the known-threadsafe
|
||||
(but non-default) USE_SIMPLE_THREADED_LEVEL3 option.
|
||||
* When building LAPACK with gfortran, -frecursive is now (again)
|
||||
enabled by default to ensure correct behaviour.
|
||||
* The OpenBLAS version cblas.h now supports both CBLAS_ORDER and
|
||||
CBLAS_LAYOUT as the name of the matrix row/column order option.
|
||||
* Externally set LDFLAGS are now passed through to the final compile/link
|
||||
steps to facilitate setting platform-specific linker flags.
|
||||
* A potential race condition during the build of LAPACK (that would
|
||||
usually manifest itself as a failure to build TESTING/MATGEN) has been
|
||||
fixed.
|
||||
* xHEMV has been changed to stay single-threaded for small input sizes
|
||||
where the overhead of multithreading exceeds any possible gains
|
||||
* CSWAP and ZSWAP have been limited to a single thread except on ARMV8 or
|
||||
ThunderX hardware with sizable input.
|
||||
* Linker flags for the PGI compiler have been updated
|
||||
* Behaviour of AXPY with zero increments is now handled in the C interface,
|
||||
correcting the result on at least Intel Atom.
|
||||
* The result matrix from calling SGELSS with an all-zero input matrix is
|
||||
now zeroed completely.
|
||||
|
||||
x86_64:
|
||||
* Autodetection of AMD Ryzen2 has been fixed (again).
|
||||
* CMAKE builds now support labeling of an INTERFACE64=1 build of
|
||||
the library with the _64 suffix.
|
||||
* AVX512 version of DGEMM has been added and the AVX512 SGEMM kernel
|
||||
has been sped up by rewriting with C intrinsics
|
||||
* Fixed compilation on RHEL5/CENTOS5 (issue with typename __WAIT_STATUS)
|
||||
|
||||
POWER:
|
||||
* added support for building on AIX (with gcc and GNU tools from AIX Toolbox).
|
||||
* CPU type detection has been implemented for AIX.
|
||||
* CPU type detection has been fixed for NETBSD.
|
||||
|
||||
MIPS64:
|
||||
* AXPY on LOONGSON3A has been corrected to pass "zero increment" utest.
|
||||
* DSDOT on LOONGSON3A has been fixed.
|
||||
* the SGEMM microkernel has been hardened against potential data loss.
|
||||
|
||||
ARMV8:
|
||||
* DYNAMic_ARCH support is now available for 64bit ARM
|
||||
* cross-compiling for ARMV8 under iOS now works.
|
||||
* cpu-specific code has been rearranged to make better use of both
|
||||
hardware commonalities and model-specific compiler optimizations.
|
||||
* XGENE1 has been removed as a TARGET, superseded by the improved generic
|
||||
ARMV8 support.
|
||||
|
||||
ARMV7:
|
||||
* Older assembly mnemonics have been converted to UAL form to allow
|
||||
building with clang 7.0
|
||||
* Cross compiling LAPACKE for Android has been fixed again (broken by
|
||||
update to LAPACK 3.7.0 some while ago).
|
||||
|
||||
====================================================================
|
||||
Version 0.3.3
|
||||
31-Aug-2018
|
||||
|
||||
common:
|
||||
* thread memory allocation has been switched back to the method
|
||||
used before version 0.3.1 due to unexpected problems caused by
|
||||
the new code under some circumstances. A new compile-time option
|
||||
USE_TLS has been added to enable the new code, and it is hoped
|
||||
that this can become the default again in the next version.
|
||||
* LAPAck PR272 has been integrated, which fixes spurious errors
|
||||
in DSYEVR and related functions caused by missing conversion
|
||||
from ILAENV to ILAENV_2STAGE in several _2stage routines.
|
||||
* the cmake-generated OpenBLASConfig.cmake now uses correct case
|
||||
for the name of the library
|
||||
* added support for Haiku OS
|
||||
|
||||
x86_64:
|
||||
* added AVX512 implementations of SDOT, DDOT, SAXPY, DAXPY,
|
||||
DSCAL, DGEMVN and DSYMVL
|
||||
* added a workaround for a cygwin issue that prevented compilation
|
||||
of AVX512 code
|
||||
|
||||
IBM Z:
|
||||
* added autodetection of Z14
|
||||
* fixed TRMM errors in the generic target
|
||||
|
||||
====================================================================
|
||||
Version 0.3.2
|
||||
30-Jul-2018
|
||||
|
||||
common:
|
||||
* fixes for regressions caused by the rewrite of the thread
|
||||
initialization code in 0.3.1
|
||||
|
||||
POWER:
|
||||
* fixed cpu autodetection for the BSDs
|
||||
|
||||
MIPS64:
|
||||
* fixed utest errors in AXPY, DSDOT, ROT and SWAP
|
||||
|
||||
x86_64:
|
||||
* added autodetection of AMD Ryzen 2
|
||||
* fixed build with older versions of MSVC
|
||||
|
||||
====================================================================
|
||||
Version 0.3.1
|
||||
01-Jul-2018
|
||||
|
||||
common:
|
||||
* rewritten thread initialization code with significantly reduced overhead
|
||||
* added CBLAS interfaces to the IxAMIN BLAS extension functions
|
||||
* fixed the lapack-test target
|
||||
* CMAKE builds now create an OpenBLASConfig.cmake file
|
||||
* ZAXPY now uses a single thread for small input sizes
|
||||
* the LAPACK code was updated from Reference-LAPACK/lapack#253
|
||||
(fixing LAPACKE interfaces to Aasen's functions)
|
||||
|
||||
POWER:
|
||||
* corrected CROT and ZROT behaviour with zero INC_X
|
||||
|
||||
ARMV7:
|
||||
* corrected xDOT behaviour with zero INC_X or INC_Y
|
||||
|
||||
x86_64:
|
||||
* retired some older targets of DYNAMIC_ARCH builds to a new option DYNAMIC_OLDER,
|
||||
this affects PENRYN,DUNNINGTON,OPTERON,OPTERON_SSE3,BOBCAT,ATOM and NANO
|
||||
(which will still be supported via the slower PRESCOTT kernels when this option is not set)
|
||||
* added an option DYNAMIC_LIST that (used in conjunction with DYNAMIC_ARCH) allows to
|
||||
specify the list of x86_64 targets to include. Any target not on the list will be supported
|
||||
by the Sandybridge or Nehalem kernels if available, or by Prescott.
|
||||
* improved SWITCH_RATIO on Haswell for increased GEMM throughput
|
||||
* added initial support for Intel Skylake X, including an AVX512 SGEMM kernel
|
||||
* added autodetection of Intel Cannon Lake series as Skylake X
|
||||
* added a default L2 cache size for hypervisors that return zero here (Chromebook)
|
||||
* fixed a name clash with recent Windows10 headers that broke the build with (at least)
|
||||
recent mingw from MSYS2
|
||||
* fixed a link error in mixed clang/gfortran builds with OpenMP
|
||||
* updated the OSX deployment target to 10.8
|
||||
* switched on parallel make for builds on MS Windows by default
|
||||
|
||||
x86:
|
||||
* fixed SSWAP and DSWAP behaviour with zero INC_X and INC_Y
|
||||
|
||||
====================================================================
|
||||
Version 0.3.0
|
||||
23-May-2108
|
||||
|
||||
common:
|
||||
* fixed some more thread race and locking bugs
|
||||
* added preliminary support for calling an OpenMP build of the library from multiple threads
|
||||
* removed performance impact of thread locks added in 0.2.20 on OpenMP code
|
||||
* general code cleanup
|
||||
* optimized DSDOT implementation
|
||||
* improved thread distribution for GEMM
|
||||
* corrected IMATCOPY/OMATCOPY implementation
|
||||
* fixed out-of-bounds accesses in the multithreaded xBMV/xPMV and SYMV implementations
|
||||
* cmake build improvements
|
||||
* pkgconfig file now contains build options
|
||||
* openblas_get_config() now reports USE_OPENMP and NUM_THREADS settings used for the build
|
||||
* corrections and improvements for systems with more than 64 cpus
|
||||
* LAPACK code updated to 3.8.0 including later fixes
|
||||
* added ReLAPACK, a recursive implementation of several LAPACK functions
|
||||
* Rewrote ROTMG to handle cases that the netlib code failed to address
|
||||
* Disabled (broken) multithreading code for xTRMV
|
||||
* corrected prototypes of complex CBLAS functions to make our cblas.h match the generally accepted standard
|
||||
* shared memory access failures on startup are now handled more gracefully
|
||||
* restored utests from earlier releases (and made them pass on all affected systems)
|
||||
|
||||
SPARC:
|
||||
* several fixes for cpu autodetection
|
||||
|
||||
POWER:
|
||||
* corrected vector register overwriting in several Power8 kernels
|
||||
* optimized additional BLAS functions
|
||||
|
||||
ARM:
|
||||
* added support for CortexA53 and A72
|
||||
* added autodetection for ThunderX2T99
|
||||
* made most optimized kernels the default for generic ARMv8 targets
|
||||
|
||||
x86_64:
|
||||
* parallelized DDOT kernel for Haswell
|
||||
* changed alignment directives in assembly kernels to boost performance on OSX
|
||||
* fixed register handling in the GEMV microkernels (bug exposed by gcc7)
|
||||
* added support for building on OpenBSD and Dragonfly
|
||||
* updated compiler options to work with Intel release 2018
|
||||
* support fully optimized build with clang/flang on Microsoft Windows
|
||||
* fixed building on AIX
|
||||
|
||||
IBM Z:
|
||||
* added optimized BLAS 1/2 functions
|
||||
|
||||
MIPS:
|
||||
* fixed cpu autodetection helper code
|
||||
* added mips32 1004K cpu (Mediatek MT7621 and similar SoC)
|
||||
* added mips64 I6500 cpu
|
||||
|
||||
====================================================================
|
||||
Version 0.2.20
|
||||
24-Jul-2017
|
||||
|
||||
common:
|
||||
* Improved CMake support
|
||||
* Fixed several thread race and locking bugs
|
||||
* Fixed default LAPACK optimization level
|
||||
* Updated LAPACK to 3.7.0
|
||||
* Added ReLAPACK (https://github.com/HPAC/ReLAPACK, make BUILD_RELAPACK=1)
|
||||
|
||||
POWER:
|
||||
* Optimizations for Power9
|
||||
* Fixed several Power8 assembly bugs
|
||||
|
||||
ARM:
|
||||
* New optimized Vulcan and ThunderX2T99 targets
|
||||
* Support for ARMV7 SOFT_FP ABI (make ARM_SOFTFP_ABI=1)
|
||||
* Detect all cpu cores including offline ones
|
||||
* Fix compilation with CLANG
|
||||
* Support building a shared library for Android
|
||||
|
||||
MIPS:
|
||||
* Fixed several threading issues
|
||||
* Fix compilation with CLANG
|
||||
|
||||
x86_64:
|
||||
* Detect Intel Bay Trail and Apollo Lake
|
||||
* Detect Intel Sky Lake and Kaby Lake
|
||||
* Detect Intel Knights Landing
|
||||
* Detect AMD A8, A10, A12 and Ryzen
|
||||
* Support 64bit builds with Visual Studio
|
||||
* Fix building with Intel and PGI compilers
|
||||
* Fix building with MINGW and TDM-GCC
|
||||
* Fix cmake builds for Haswell and related cpus
|
||||
* Fix building for Sandybridge with CLANG 3.9
|
||||
* Add support for the FLANG compiler
|
||||
|
||||
IBM Z:
|
||||
* New target z13 with BLAS3 optimizations
|
||||
|
||||
====================================================================
|
||||
Version 0.2.19
|
||||
1-Sep-2016
|
||||
common:
|
||||
* Improved cross compiling.
|
||||
* Fix the bug on musl libc.
|
||||
|
||||
POWER:
|
||||
* Optimize BLAS on Power8
|
||||
* Fixed Julia+OpenBLAS bugs on Power8
|
||||
|
||||
MIPS:
|
||||
* Optimize BLAS on MIPS P5600 and I6400 (Thanks, Shivraj Patil, Kaustubh Raste)
|
||||
|
||||
ARM:
|
||||
* Improved on ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
|
||||
|
||||
|
||||
====================================================================
|
||||
Version 0.2.18
|
||||
12-Apr-2016
|
||||
common:
|
||||
* If you set MAKE_NB_JOBS flag less or equal than zero,
|
||||
make will be without -j.
|
||||
|
||||
x86/x86_64:
|
||||
* Support building Visual Studio static library. (#813, Thanks, theoractice)
|
||||
* Fix bugs to pass buidbot CI tests (http://build.openblas.net)
|
||||
|
||||
ARM:
|
||||
* Provide DGEMM 8x4 kernel for Cortex-A57 (Thanks, Ashwin Sekhar T K)
|
||||
|
||||
POWER:
|
||||
* Optimize S and C BLAS3 on Power8
|
||||
* Optimize BLAS2/1 on Power8
|
||||
|
||||
====================================================================
|
||||
Version 0.2.17
|
||||
20-Mar-2016
|
||||
common:
|
||||
* Enable BUILD_LAPACK_DEPRECATED=1 by default.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.16
|
||||
15-Mar-2016
|
||||
common:
|
||||
* Avoid potential getenv segfault. (#716)
|
||||
* Import LAPACK svn bugfix #142-#147,#150-#155
|
||||
|
||||
x86/x86_64:
|
||||
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller
|
||||
* Fix bug with scipy linalg test.
|
||||
|
||||
ARM:
|
||||
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
|
||||
|
||||
POWER:
|
||||
* Optimize D and Z BLAS3 functions for Power8.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.16.rc1
|
||||
23-Feb-2016
|
||||
common:
|
||||
* Upgrade LAPACK to 3.6.0 version.
|
||||
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build
|
||||
LAPACK deprecated functions.
|
||||
* Add MAKE_NB_JOBS option in Makefile.
|
||||
Force number of make jobs.This is particularly
|
||||
useful when using distcc. (#735. Thanks, Jerome Robert.)
|
||||
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor).
|
||||
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert)
|
||||
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert)
|
||||
* Let openblas_get_num_threads return the number of active threads.
|
||||
(#760. Thanks, Jerome Robert)
|
||||
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen)
|
||||
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey)
|
||||
* Update scipy benchmark script. (#745. Thanks, John Kirkham)
|
||||
|
||||
x86/x86_64:
|
||||
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller.
|
||||
* Detect Intel Avoton.
|
||||
* Detect AMD Trinity, Richland, E2-3200.
|
||||
* Fix gemv performance bug on Mac OSX Intel Haswell.
|
||||
* Fix some bugs with CMake and Visual Studio
|
||||
|
||||
ARM:
|
||||
* Support and optimize Cortex-A57 AArch64.
|
||||
(#686. Thanks, Ashwin Sekhar TK)
|
||||
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere)
|
||||
* Update ARMV6 kernels.
|
||||
|
||||
POWER:
|
||||
* Fix detection of POWER architecture
|
||||
(#684. Thanks, Sebastien Villemot)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.15
|
||||
27-Oct-2015
|
||||
|
||||
107
Makefile
107
Makefile
@@ -7,6 +7,10 @@ ifneq ($(DYNAMIC_ARCH), 1)
|
||||
BLASDIRS += kernel
|
||||
endif
|
||||
|
||||
ifdef UTEST_CHECK
|
||||
SANITY_CHECK = 1
|
||||
endif
|
||||
|
||||
ifdef SANITY_CHECK
|
||||
BLASDIRS += reference
|
||||
endif
|
||||
@@ -16,30 +20,14 @@ ifneq ($(NO_LAPACK), 1)
|
||||
SUBDIRS += lapack
|
||||
endif
|
||||
|
||||
RELA =
|
||||
ifeq ($(BUILD_RELAPACK), 1)
|
||||
RELA = re_lapack
|
||||
endif
|
||||
|
||||
ifeq ($(NO_FORTRAN), 1)
|
||||
define NOFORTRAN
|
||||
1
|
||||
endef
|
||||
define NO_LAPACK
|
||||
1
|
||||
endef
|
||||
export NOFORTRAN
|
||||
export NO_LAPACK
|
||||
endif
|
||||
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib $(RELA) test ctest shared install
|
||||
.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
.NOTPARALLEL : all libs prof lapack-test install blas-test
|
||||
|
||||
all :: libs netlib $(RELA) tests shared
|
||||
all :: libs netlib tests shared
|
||||
@echo
|
||||
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
|
||||
@echo
|
||||
@@ -58,7 +46,7 @@ endif
|
||||
endif
|
||||
|
||||
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
|
||||
endif
|
||||
ifneq ($(OSNAME), AIX)
|
||||
@@ -96,20 +84,23 @@ endif
|
||||
@echo
|
||||
|
||||
shared :
|
||||
ifneq ($(NO_SHARED), 1)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
|
||||
ifndef NO_SHARED
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@$(MAKE) -C exports so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
@$(MAKE) -C exports so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
@$(MAKE) -C exports so
|
||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@$(MAKE) -C exports dyn
|
||||
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@$(MAKE) -C exports dll
|
||||
@@ -120,22 +111,25 @@ endif
|
||||
endif
|
||||
|
||||
tests :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
ifndef TARGET
|
||||
ifndef CROSS
|
||||
touch $(LIBNAME)
|
||||
ifndef NO_FBLAS
|
||||
$(MAKE) -C test all
|
||||
endif
|
||||
ifdef UTEST_CHECK
|
||||
$(MAKE) -C utest all
|
||||
endif
|
||||
endif
|
||||
ifndef NO_CBLAS
|
||||
$(MAKE) -C ctest all
|
||||
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
|
||||
$(MAKE) -C cpp_thread_test all
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
libs :
|
||||
ifeq ($(CORE), UNKNOWN)
|
||||
ifeq ($(CORE), UNKOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
@@ -168,9 +162,6 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
done
|
||||
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
|
||||
endif
|
||||
endif
|
||||
ifdef USE_THREAD
|
||||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
@@ -225,7 +216,7 @@ netlib :
|
||||
|
||||
else
|
||||
netlib : lapack_prebuild
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
endif
|
||||
@@ -234,19 +225,11 @@ ifndef NO_LAPACKE
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
re_lapack :
|
||||
|
||||
else
|
||||
re_lapack :
|
||||
@$(MAKE) -C relapack
|
||||
endif
|
||||
|
||||
prof_lapack : lapack_prebuild
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
|
||||
|
||||
lapack_prebuild :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@@ -255,8 +238,8 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@@ -271,8 +254,6 @@ ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifdef SMP
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else ifeq ($(OSNAME), Haiku)
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
@@ -283,41 +264,36 @@ else
|
||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
|
||||
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
||||
large.tgz :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
timing.tgz :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/timing.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
lapack-timing : large.tgz timing.tgz
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
endif
|
||||
|
||||
|
||||
lapack-test :
|
||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
ifneq ($(CROSS), 1)
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
endif
|
||||
@@ -329,9 +305,9 @@ lapack-runtest:
|
||||
|
||||
|
||||
blas-test:
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out)
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out)
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
|
||||
|
||||
|
||||
dummy :
|
||||
@@ -357,7 +333,6 @@ endif
|
||||
@touch $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
@$(MAKE) -C relapack clean
|
||||
@rm -f *.grd Makefile.conf_last config_last.h
|
||||
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
|
||||
@echo Done.
|
||||
|
||||
33
Makefile.arm
33
Makefile.arm
@@ -1,14 +1,33 @@
|
||||
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
|
||||
# ifeq logical or
|
||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -mfpu=neon
|
||||
FCOMMON_OPT += -mfpu=neon
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -mfpu=vfp
|
||||
FCOMMON_OPT += -mfpu=vfp
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(CORE), ARMV5)
|
||||
CCOMMON_OPT += -marm -march=armv5
|
||||
FCOMMON_OPT += -marm -march=armv5
|
||||
endif
|
||||
|
||||
|
||||
|
||||
@@ -4,42 +4,8 @@ CCOMMON_OPT += -march=armv8-a
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA53)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA57)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA72)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA73)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), FALKOR)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX2T99)
|
||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), TSV110)
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||
endif
|
||||
|
||||
160
Makefile.install
160
Makefile.install
@@ -12,7 +12,6 @@ OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
||||
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
@@ -21,149 +20,110 @@ lib.grd :
|
||||
$(error OpenBLAS: Please run "make" firstly)
|
||||
|
||||
install : lib.grd
|
||||
@-mkdir -p "$(DESTDIR)$(PREFIX)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)"
|
||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@-mkdir -p $(DESTDIR)$(PREFIX)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#define OPENBLAS_CONFIG_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@cat openblas_config_template.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
|
||||
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#include \"openblas_config.h\" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@cat common_interface.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#endif >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
|
||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h
|
||||
|
||||
ifndef NO_CBLAS
|
||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h
|
||||
endif
|
||||
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifneq ($(NO_STATIC),1)
|
||||
ifndef NO_STATIC
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifneq ($(NO_SHARED),1)
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
|
||||
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ifeq ($(OSNAME), Linux)
|
||||
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
|
||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)
|
||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
endif
|
||||
endif
|
||||
|
||||
else
|
||||
#install on AIX has different options syntax
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifneq ($(NO_STATIC),1)
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifneq ($(NO_SHARED),1)
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
#Generating openblas.pc
|
||||
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
|
||||
|
||||
#Generating OpenBLASConfig.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
|
||||
ifneq ($(NO_SHARED),1)
|
||||
ifndef NO_SHARED
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
else
|
||||
#only static
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
#Generating OpenBLASConfigVersion.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "else ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo " endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)
|
||||
@echo Install OK!
|
||||
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
ifdef BINARY64
|
||||
else
|
||||
endif
|
||||
@@ -1,38 +1,4 @@
|
||||
|
||||
ifdef USE_THREAD
|
||||
ifeq ($(USE_THREAD), 0)
|
||||
USE_OPENMP = 0
|
||||
else
|
||||
USE_OPENMP = 1
|
||||
endif
|
||||
else
|
||||
USE_OPENMP = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER9)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
else
|
||||
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER8)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
else
|
||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
|
||||
# workaround for C->FORTRAN ABI violation in LAPACKE
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -fno-optimize-sibling-calls
|
||||
endif
|
||||
# CCOMMON_OPT += -DALLOC_SHM
|
||||
|
||||
FLAMEPATH = $(HOME)/flame/lib
|
||||
|
||||
@@ -50,16 +16,6 @@ else
|
||||
endif
|
||||
endif
|
||||
|
||||
#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library
|
||||
#USE_MASS = 1
|
||||
|
||||
ifeq ($(USE_MASS), 1)
|
||||
# Path to MASS libs, change it if the libs are installed at any other location
|
||||
MASSPATH = /opt/ibm/xlmass/8.1.5/lib
|
||||
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS
|
||||
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8
|
||||
endif
|
||||
|
||||
ifdef BINARY64
|
||||
|
||||
|
||||
|
||||
@@ -17,34 +17,14 @@ ifdef CPUIDEMU
|
||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), 1004K)
|
||||
TARGET_FLAGS = -mips32r2
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), P5600)
|
||||
TARGET_FLAGS = -mips32r5
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), I6400)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), P6600)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), I6500)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
all: getarch_2nd
|
||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||
|
||||
config.h : c_check f_check getarch
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS)
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC)
|
||||
else
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||
|
||||
155
Makefile.rule
155
Makefile.rule
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.7
|
||||
VERSION = 0.2.16.dev
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
@@ -17,11 +17,6 @@ VERSION = 0.3.7
|
||||
# If you want to support multiple architecture in one binary
|
||||
# DYNAMIC_ARCH = 1
|
||||
|
||||
# If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH
|
||||
# mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON,
|
||||
# OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures)
|
||||
# DYNAMIC_OLDER = 1
|
||||
|
||||
# C compiler including binary type(32bit / 64bit). Default is gcc.
|
||||
# Don't use Intel Compiler or PGI, it won't generate right codes as I expect.
|
||||
# CC = gcc
|
||||
@@ -48,8 +43,6 @@ VERSION = 0.3.7
|
||||
# HOSTCC = gcc
|
||||
|
||||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
|
||||
# Please note that AVX is not available on 32-bit.
|
||||
# Setting BINARY=32 disables AVX/AVX2/AVX-512.
|
||||
# BINARY=64
|
||||
|
||||
# About threaded BLAS. It will be automatically detected if you don't
|
||||
@@ -58,83 +51,40 @@ VERSION = 0.3.7
|
||||
# For force setting for multi threaded, specify USE_THREAD = 1
|
||||
# USE_THREAD = 0
|
||||
|
||||
# If you want to build a single-threaded OpenBLAS, but expect to call this
|
||||
# from several concurrent threads in some other program, comment this in for
|
||||
# thread safety. (This is done automatically for USE_THREAD=1 , and should not
|
||||
# be necessary when USE_OPENMP=1)
|
||||
# USE_LOCKING = 1
|
||||
|
||||
# If you're going to use this library with OpenMP, please comment it in.
|
||||
# This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8.
|
||||
# USE_OPENMP = 1
|
||||
|
||||
# The OpenMP scheduler to use - by default this is "static" and you
|
||||
# will normally not want to change this unless you know that your main
|
||||
# workload will involve tasks that have highly unbalanced running times
|
||||
# for individual threads. Changing away from "static" may also adversely
|
||||
# affect memory access locality in NUMA systems. Setting to "runtime" will
|
||||
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
|
||||
# CCOMMON_OPT += -DOMP_SCHED=dynamic
|
||||
|
||||
# You can define the maximum number of threads. Basically it should be less
|
||||
# than or equal to the number of CPU threads. If you don't specify one, it's
|
||||
# automatically detected by the build system.
|
||||
# If SMT (aka. HT) is enabled on the system, it may or may not be beneficial to
|
||||
# restrict NUM_THREADS to the number of physical cores. By default, the automatic
|
||||
# detection includes logical CPUs, thus allowing the use of SMT.
|
||||
# Users may opt at runtime to use less than NUM_THREADS threads.
|
||||
#
|
||||
# Note for package maintainers: you can build OpenBLAS with a large NUM_THREADS
|
||||
# value (eg. 32-256) if you expect your users to use that many threads. Due to the way
|
||||
# some internal structures are allocated, using a large NUM_THREADS value has a RAM
|
||||
# footprint penalty, even if users reduce the actual number of threads at runtime.
|
||||
# You can define maximum number of threads. Basically it should be
|
||||
# less than actual number of cores. If you don't specify one, it's
|
||||
# automatically detected by the the script.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# If you have enabled USE_OPENMP and your application would call
|
||||
# OpenBLAS's calculation API from multiple threads, please comment this in.
|
||||
# This flag defines how many instances of OpenBLAS's calculation API can actually
|
||||
# run in parallel. If more than NUM_PARALLEL threads call OpenBLAS's calculation API,
|
||||
# they need to wait for the preceding API calls to finish or risk data corruption.
|
||||
# NUM_PARALLEL = 2
|
||||
|
||||
# If you don't need to install the static library, please comment this in.
|
||||
# if you don't need to install the static library, please comment it in.
|
||||
# NO_STATIC = 1
|
||||
|
||||
# If you don't need to generate the shared library, please comment this in.
|
||||
# if you don't need generate the shared library, please comment it in.
|
||||
# NO_SHARED = 1
|
||||
|
||||
# If you don't need the CBLAS interface, please comment this in.
|
||||
# If you don't need CBLAS interface, please comment it in.
|
||||
# NO_CBLAS = 1
|
||||
|
||||
# If you only want the CBLAS interface without installing a Fortran compiler,
|
||||
# please comment this in.
|
||||
# If you only want CBLAS interface without installing Fortran compiler,
|
||||
# please comment it in.
|
||||
# ONLY_CBLAS = 1
|
||||
|
||||
# If you don't need LAPACK, please comment this in.
|
||||
# If you set NO_LAPACK=1, the build system automatically sets NO_LAPACKE=1.
|
||||
# If you don't need LAPACK, please comment it in.
|
||||
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1.
|
||||
# NO_LAPACK = 1
|
||||
|
||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment this in.
|
||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
||||
# NO_LAPACKE = 1
|
||||
|
||||
# Build LAPACK Deprecated functions since LAPACK 3.6.0
|
||||
BUILD_LAPACK_DEPRECATED = 1
|
||||
|
||||
# Build RecursiveLAPACK on top of LAPACK
|
||||
# BUILD_RELAPACK = 1
|
||||
|
||||
# If you want to use the legacy threaded Level 3 implementation.
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
||||
# If you want to use the new, still somewhat experimental code that uses
|
||||
# thread-local storage instead of a central memory buffer in memory.c
|
||||
# Note that if your system uses GLIBC, it needs to have at least glibc 2.21
|
||||
# for this to work.
|
||||
# USE_TLS = 1
|
||||
# If you want to use legacy threaded Level 3 implementation.
|
||||
USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
||||
# If you want to drive whole 64bit region by BLAS. Not all Fortran
|
||||
# compilers support this. It's safe to keep this commented out if you
|
||||
# are not sure. (This is equivalent to the "-i8" ifort option).
|
||||
# compiler supports this. It's safe to keep comment it out if you
|
||||
# are not sure(equivalent to "-i8" option).
|
||||
# INTERFACE64 = 1
|
||||
|
||||
# Unfortunately most of kernel won't give us high quality buffer.
|
||||
@@ -142,18 +92,10 @@ BUILD_LAPACK_DEPRECATED = 1
|
||||
# but it will consume time. If you don't like it, you can disable one.
|
||||
NO_WARMUP = 1
|
||||
|
||||
# Comment this in if you want to disable OpenBLAS's CPU/Memory affinity handling.
|
||||
# This feature is only implemented on Linux, and is always disabled on other platforms.
|
||||
# Enabling affinity handling may improve performance, especially on NUMA systems, but
|
||||
# it may conflict with certain applications that also try to manage affinity.
|
||||
# This conflict can result in threads of the application calling OpenBLAS ending up locked
|
||||
# to the same core(s) as OpenBLAS, possibly binding all threads to a single core.
|
||||
# For this reason, affinity handling is disabled by default. Can be safely enabled if nothing
|
||||
# else modifies affinity settings.
|
||||
# Note: enabling affinity has been known to cause problems with NumPy and R
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
NO_AFFINITY = 1
|
||||
|
||||
# If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# BIGNUMA = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
@@ -163,27 +105,13 @@ NO_AFFINITY = 1
|
||||
# Don't use Haswell optimizations if binutils is too old (e.g. RHEL6)
|
||||
# NO_AVX2 = 1
|
||||
|
||||
# Don't use SkylakeX optimizations if binutils or compiler are too old (the build
|
||||
# system will try to determine this automatically)
|
||||
# NO_AVX512 = 1
|
||||
|
||||
# Don't use parallel make.
|
||||
# NO_PARALLEL_MAKE = 1
|
||||
|
||||
# Force number of make jobs. The default is the number of logical CPU of the host.
|
||||
# This is particularly useful when using distcc.
|
||||
# A negative value will disable adding a -j flag to make, allowing to use a parent
|
||||
# make -j value. This is useful to call OpenBLAS make from an other project
|
||||
# makefile
|
||||
# MAKE_NB_JOBS = 2
|
||||
|
||||
# If you would like to know minute performance report of GotoBLAS.
|
||||
# FUNCTION_PROFILE = 1
|
||||
|
||||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
|
||||
# This option should not be used - it is a holdover from unfinished code present
|
||||
# in the original GotoBLAS2 library that may be usable as a starting point but
|
||||
# is not even expected to compile in its present form.
|
||||
# QUAD_PRECISION = 1
|
||||
|
||||
# Theads are still working for a while after finishing BLAS operation
|
||||
@@ -191,39 +119,38 @@ NO_AFFINITY = 1
|
||||
# time out to improve performance. This number should be from 4 to 30
|
||||
# which corresponds to (1 << n) cycles. For example, if you set to 26,
|
||||
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
|
||||
# system). Also you can control this number by THREAD_TIMEOUT
|
||||
# system). Also you can control this mumber by THREAD_TIMEOUT
|
||||
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26
|
||||
|
||||
# Using special device driver for mapping physically contiguous memory
|
||||
# Using special device driver for mapping physically contigous memory
|
||||
# to the user space. If bigphysarea is enabled, it will use it.
|
||||
# DEVICEDRIVER_ALLOCATION = 1
|
||||
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. (Actually in recent versions this is a factor proportional to the
|
||||
# number of floating point operations necessary for the given problem size, no longer
|
||||
# an individual dimension). You can use this setting to avoid the overhead of multi-
|
||||
# threading in small matrix sizes. The default value is 4, but values as high as 50 have
|
||||
# been reported to be optimal for certain workloads (50 is the recommended value for Julia).
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||
# in small matrix sizes. The default value is 4.
|
||||
# GEMM_MULTITHREAD_THRESHOLD = 4
|
||||
|
||||
# If you need sanity check by comparing results to reference BLAS. It'll be very
|
||||
# If you need santy check by comparing reference BLAS. It'll be very
|
||||
# slow (Not implemented yet).
|
||||
# SANITY_CHECK = 1
|
||||
|
||||
# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable
|
||||
# SANITY_CHECK to compare the result with reference BLAS.
|
||||
# UTEST_CHECK = 1
|
||||
|
||||
# The installation directory.
|
||||
# PREFIX = /opt/OpenBLAS
|
||||
|
||||
# Common Optimization Flag;
|
||||
# The default -O2 is enough.
|
||||
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
# gfortran option for LAPACK to improve thread-safety
|
||||
# It is enabled by default in Makefile.system for gfortran
|
||||
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
|
||||
# gfortran option for LAPACK
|
||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
||||
# FCOMMON_OPT = -frecursive
|
||||
|
||||
# Profiling flags
|
||||
@@ -232,11 +159,10 @@ COMMON_PROF = -pg
|
||||
# Build Debug version
|
||||
# DEBUG = 1
|
||||
|
||||
# Set maximum stack allocation.
|
||||
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
|
||||
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
# Improve GEMV and GER for small matrices by stack allocation.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
#
|
||||
# MAX_STACK_ALLOC = 0
|
||||
MAX_STACK_ALLOC=2048
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoid conflicts with other BLAS libraries, especially when using
|
||||
@@ -249,21 +175,6 @@ COMMON_PROF = -pg
|
||||
# SYMBOLPREFIX=
|
||||
# SYMBOLSUFFIX=
|
||||
|
||||
# Run a C++ based thread safety tester after the build is done.
|
||||
# This is mostly intended as a developer feature to spot regressions, but users and
|
||||
# package maintainers can enable this if they have doubts about the thread safety of
|
||||
# the library, given the configuration in this file.
|
||||
# By default, the thread safety tester launches 52 concurrent calculations at the same
|
||||
# time.
|
||||
#
|
||||
# Please note that the test uses ~1300 MiB of RAM for the DGEMM test.
|
||||
#
|
||||
# The test requires CBLAS to be built, a C++11 capable compiler and the presence of
|
||||
# an OpenMP implementation. If you are cross-compiling this test will probably not
|
||||
# work at all.
|
||||
#
|
||||
# CPP_THREAD_SAFETY_TEST = 1
|
||||
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
||||
325
Makefile.system
325
Makefile.system
@@ -9,22 +9,6 @@ ifndef TOPDIR
|
||||
TOPDIR = .
|
||||
endif
|
||||
|
||||
# If ARCH is not set, we use the host system's architecture.
|
||||
ifndef ARCH
|
||||
ARCH := $(shell uname -m)
|
||||
endif
|
||||
|
||||
# Catch conflicting usage of ARCH in some BSD environments
|
||||
ifeq ($(ARCH), amd64)
|
||||
override ARCH=x86_64
|
||||
else ifeq ($(ARCH), powerpc64)
|
||||
override ARCH=power
|
||||
else ifeq ($(ARCH), i386)
|
||||
override ARCH=x86
|
||||
else ifeq ($(ARCH), aarch64)
|
||||
override ARCH=arm64
|
||||
endif
|
||||
|
||||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
||||
|
||||
# Default C compiler
|
||||
@@ -33,24 +17,15 @@ NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
||||
# http://stackoverflow.com/questions/4029274/mingw-and-make-variables
|
||||
# - Default value is 'cc' which is not always a valid command (e.g. MinGW).
|
||||
ifeq ($(origin CC),default)
|
||||
|
||||
# Check if $(CC) refers to a valid command and set the value to gcc if not
|
||||
ifneq ($(findstring cmd.exe,$(SHELL)),)
|
||||
ifeq ($(shell where $(CC) 2>NUL),)
|
||||
CC = gcc
|
||||
# Change the default compile to clang on Mac OSX.
|
||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CC = clang
|
||||
# EXTRALIB += -Wl,-no_compact_unwind
|
||||
endif
|
||||
endif
|
||||
else # POSIX-ish
|
||||
ifeq ($(shell command -v $(CC) 2>/dev/null),)
|
||||
ifeq ($(shell uname -s),Darwin)
|
||||
CC = clang
|
||||
# EXTRALIB += -Wl,-no_compact_unwind
|
||||
else
|
||||
CC = gcc
|
||||
endif # Darwin
|
||||
endif # CC exists
|
||||
endif # Shell is sane
|
||||
|
||||
endif # CC is set to default
|
||||
|
||||
# Default Fortran compiler (FC) is selected by f_check.
|
||||
|
||||
@@ -70,7 +45,6 @@ endif
|
||||
|
||||
ifdef TARGET
|
||||
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
||||
GETARCH_FLAGS += -DUSER_TARGET
|
||||
endif
|
||||
|
||||
# Force fallbacks for 32bit
|
||||
@@ -79,9 +53,6 @@ ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
@@ -97,12 +68,6 @@ endif
|
||||
ifeq ($(TARGET), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), ZEN)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), ARMV8)
|
||||
GETARCH_FLAGS := -DFORCE_ARMV7
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -118,9 +83,6 @@ ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET_CORE), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
@@ -136,18 +98,10 @@ endif
|
||||
ifeq ($(TARGET_CORE), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), ZEN)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
# On x86_64 build getarch with march=native. This is required to detect AVX512 support in getarch.
|
||||
ifeq ($(ARCH), x86_64)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
GETARCH_FLAGS += -march=native
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
@@ -165,18 +119,13 @@ GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(BINARY), 32)
|
||||
GETARCH_FLAGS += -DNO_AVX -DNO_AVX2 -DNO_AVX512
|
||||
NO_AVX512 = 1
|
||||
GETARCH_FLAGS += -DNO_AVX
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX2), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX2
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX512), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX512
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
GETARCH_FLAGS += -g
|
||||
endif
|
||||
@@ -190,10 +139,6 @@ NO_PARALLEL_MAKE=0
|
||||
endif
|
||||
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
|
||||
|
||||
ifdef MAKE_NB_JOBS
|
||||
GETARCH_FLAGS += -DMAKE_NB_JOBS=$(MAKE_NB_JOBS)
|
||||
endif
|
||||
|
||||
ifeq ($(HOSTCC), loongcc)
|
||||
GETARCH_FLAGS += -static
|
||||
endif
|
||||
@@ -210,7 +155,7 @@ ifndef GOTOBLAS_MAKEFILE
|
||||
export GOTOBLAS_MAKEFILE = 1
|
||||
|
||||
# Generating Makefile.conf and config.h
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
|
||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
|
||||
|
||||
ifndef TARGET_CORE
|
||||
include $(TOPDIR)/Makefile.conf
|
||||
@@ -220,17 +165,12 @@ endif
|
||||
|
||||
endif
|
||||
|
||||
ifndef NUM_PARALLEL
|
||||
NUM_PARALLEL = 1
|
||||
endif
|
||||
|
||||
ifndef NUM_THREADS
|
||||
NUM_THREADS = $(NUM_CORES)
|
||||
endif
|
||||
|
||||
ifeq ($(NUM_THREADS), 1)
|
||||
override USE_THREAD = 0
|
||||
override USE_OPENMP = 0
|
||||
endif
|
||||
|
||||
ifdef USE_THREAD
|
||||
@@ -247,10 +187,6 @@ SMP = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(SMP), 1)
|
||||
USE_LOCKING =
|
||||
endif
|
||||
|
||||
ifndef NEED_PIC
|
||||
NEED_PIC = 1
|
||||
endif
|
||||
@@ -267,10 +203,9 @@ OBJCOPY = $(CROSS_SUFFIX)objcopy
|
||||
OBJCONV = $(CROSS_SUFFIX)objconv
|
||||
|
||||
|
||||
# When fortran support was either not detected or actively deselected, only build BLAS.
|
||||
# For detect fortran failed, only build BLAS.
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
NO_LAPACK = 1
|
||||
override FEXTRALIB =
|
||||
endif
|
||||
|
||||
#
|
||||
@@ -278,13 +213,11 @@ endif
|
||||
#
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
ifndef MACOSX_DEPLOYMENT_TARGET
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.8
|
||||
endif
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.6
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
ifneq (,$(findstring $(OSNAME), FreeBSD OpenBSD DragonFly))
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
@@ -297,10 +230,6 @@ EXTRALIB += -lm
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Android)
|
||||
EXTRALIB += -lm
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
EXTRALIB += -lm
|
||||
endif
|
||||
@@ -358,20 +287,17 @@ endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
NEED_PIC = 0
|
||||
NO_EXPRECISION = 1
|
||||
OS_CYGWIN_NT = 1
|
||||
endif
|
||||
|
||||
ifneq ($(OSNAME), WINNT)
|
||||
ifneq ($(OSNAME), CYGWIN_NT)
|
||||
ifneq ($(OSNAME), Interix)
|
||||
ifneq ($(OSNAME), Android)
|
||||
ifdef SMP
|
||||
EXTRALIB += -lpthread
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
||||
@@ -398,17 +324,10 @@ ifdef SANITY_CHECK
|
||||
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
||||
endif
|
||||
|
||||
MAX_STACK_ALLOC ?= 2048
|
||||
ifneq ($(MAX_STACK_ALLOC), 0)
|
||||
ifdef MAX_STACK_ALLOC
|
||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
||||
endif
|
||||
|
||||
ifdef USE_LOCKING
|
||||
ifneq ($(USE_LOCKING), 0)
|
||||
CCOMMON_OPT += -DUSE_LOCKING
|
||||
endif
|
||||
endif
|
||||
|
||||
#
|
||||
# Architecture dependent settings
|
||||
#
|
||||
@@ -455,7 +374,7 @@ FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
EXPRECISION = 1
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
CCOMMON_OPT += -DEXPRECISION
|
||||
FCOMMON_OPT += -m128bit-long-double
|
||||
endif
|
||||
endif
|
||||
@@ -469,7 +388,7 @@ endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
|
||||
#check
|
||||
#check
|
||||
ifeq ($(USE_THREAD), 0)
|
||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
||||
endif
|
||||
@@ -480,11 +399,12 @@ CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
$(error OpenBLAS: Clang didn't support OpenMP yet.)
|
||||
CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), INTEL)
|
||||
CCOMMON_OPT += -fopenmp
|
||||
CCOMMON_OPT += -openmp
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
@@ -509,55 +429,17 @@ DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
DYNAMIC_CORE += PENRYN DUNNINGTON
|
||||
endif
|
||||
DYNAMIC_CORE += NEHALEM
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
DYNAMIC_CORE += OPTERON OPTERON_SSE3
|
||||
endif
|
||||
DYNAMIC_CORE += BARCELONA
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
DYNAMIC_CORE += BOBCAT ATOM NANO
|
||||
endif
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
|
||||
endif
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += HASWELL ZEN
|
||||
endif
|
||||
ifneq ($(NO_AVX512), 1)
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += SKYLAKEX
|
||||
endif
|
||||
DYNAMIC_CORE += HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef DYNAMIC_LIST
|
||||
override DYNAMIC_CORE = PRESCOTT $(DYNAMIC_LIST)
|
||||
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_PRESCOTT
|
||||
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
|
||||
CCOMMON_OPT += $(XCCOMMON_OPT)
|
||||
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
DYNAMIC_CORE = ARMV8
|
||||
DYNAMIC_CORE += CORTEXA57
|
||||
DYNAMIC_CORE += THUNDERX
|
||||
DYNAMIC_CORE += THUNDERX2T99
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), power)
|
||||
DYNAMIC_CORE = POWER6
|
||||
DYNAMIC_CORE += POWER8
|
||||
DYNAMIC_CORE += POWER9
|
||||
endif
|
||||
|
||||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
||||
ifndef DYNAMIC_CORE
|
||||
override DYNAMIC_ARCH=
|
||||
DYNAMIC_ARCH =
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -573,7 +455,7 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
||||
ifeq ($(ARCH), mips64)
|
||||
NO_BINARY_MODE = 1
|
||||
endif
|
||||
|
||||
@@ -585,23 +467,6 @@ endif
|
||||
ifeq ($(ARCH), arm)
|
||||
NO_BINARY_MODE = 1
|
||||
BINARY_DEFINED = 1
|
||||
|
||||
CCOMMON_OPT += -marm
|
||||
FCOMMON_OPT += -marm
|
||||
|
||||
# If softfp abi is mentioned on the command line, force it.
|
||||
ifeq ($(ARM_SOFTFP_ABI), 1)
|
||||
CCOMMON_OPT += -mfloat-abi=softfp
|
||||
FCOMMON_OPT += -mfloat-abi=softfp
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Android)
|
||||
ifeq ($(ARM_SOFTFP_ABI), 1)
|
||||
EXTRALIB += -lm
|
||||
else
|
||||
EXTRALIB += -Wl,-lm_hard
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
@@ -630,16 +495,13 @@ endif
|
||||
|
||||
ifdef NO_BINARY_MODE
|
||||
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64))
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifdef BINARY64
|
||||
CCOMMON_OPT += -mabi=64
|
||||
else
|
||||
CCOMMON_OPT += -mabi=n32
|
||||
endif
|
||||
BINARY_DEFINED = 1
|
||||
else ifeq ($(ARCH), $(filter $(ARCH),mips))
|
||||
CCOMMON_OPT += -mabi=32
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), LOONGSON3A)
|
||||
@@ -652,31 +514,6 @@ CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), 1004K)
|
||||
CCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), P5600)
|
||||
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), I6400)
|
||||
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), P6600)
|
||||
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), I6500)
|
||||
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
@@ -684,14 +521,12 @@ endif
|
||||
endif
|
||||
|
||||
ifndef BINARY_DEFINED
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifdef BINARY64
|
||||
CCOMMON_OPT += -m64
|
||||
else
|
||||
CCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
@@ -715,28 +550,10 @@ endif
|
||||
# Fortran Compiler dependent settings
|
||||
#
|
||||
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
CCOMMON_OPT += -DF_INTERFACE_FLANG
|
||||
ifdef BINARY64
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
FCOMMON_OPT += -Wall
|
||||
else
|
||||
FCOMMON_OPT += -Wall
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), G77)
|
||||
CCOMMON_OPT += -DF_INTERFACE_G77
|
||||
FCOMMON_OPT += -Wall
|
||||
ifndef NO_BINARY_MODE
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -m64
|
||||
else
|
||||
@@ -744,12 +561,10 @@ FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), G95)
|
||||
CCOMMON_OPT += -DF_INTERFACE_G95
|
||||
FCOMMON_OPT += -Wall
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifndef NO_BINARY_MODE
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -m64
|
||||
@@ -758,45 +573,34 @@ FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
CCOMMON_OPT += -DF_INTERFACE_GFORT
|
||||
FCOMMON_OPT += -Wall
|
||||
# make single-threaded LAPACK calls thread-safe #1847
|
||||
FCOMMON_OPT += -frecursive
|
||||
# work around ABI problem with passing single-character arguments
|
||||
FCOMMON_OPT += -fno-optimize-sibling-calls
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
EXTRALIB += -lgfortran
|
||||
endif
|
||||
ifdef NO_BINARY_MODE
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64))
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -mabi=64
|
||||
else
|
||||
FCOMMON_OPT += -mabi=n32
|
||||
endif
|
||||
else ifeq ($(ARCH), $(filter $(ARCH),mips))
|
||||
FCOMMON_OPT += -mabi=32
|
||||
endif
|
||||
else
|
||||
ifdef BINARY64
|
||||
ifneq ($(OSNAME), AIX)
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -fdefault-integer-8
|
||||
endif
|
||||
endif
|
||||
else
|
||||
ifneq ($(OSNAME), AIX)
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
@@ -810,7 +614,7 @@ FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -866,7 +670,21 @@ FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifneq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
else
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -mabi=64
|
||||
else
|
||||
FCOMMON_OPT += -mabi=n32
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
@@ -882,7 +700,7 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
FCOMMON_OPT += -n32
|
||||
else
|
||||
@@ -912,7 +730,7 @@ endif
|
||||
|
||||
ifeq ($(C_COMPILER), OPEN64)
|
||||
|
||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
||||
ifeq ($(ARCH), mips64)
|
||||
ifndef BINARY64
|
||||
CCOMMON_OPT += -n32
|
||||
else
|
||||
@@ -990,10 +808,6 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||
CCOMMON_OPT += -DDYNAMIC_ARCH
|
||||
endif
|
||||
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
CCOMMON_OPT += -DDYNAMIC_OLDER
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
CCOMMON_OPT += -DNO_LAPACK
|
||||
#Disable LAPACK C interface
|
||||
@@ -1016,10 +830,6 @@ ifeq ($(NO_AVX2), 1)
|
||||
CCOMMON_OPT += -DNO_AVX2
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX512), 1)
|
||||
CCOMMON_OPT += -DNO_AVX512
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
@@ -1066,18 +876,10 @@ endif
|
||||
|
||||
CCOMMON_OPT += -DMAX_CPU_NUMBER=$(NUM_THREADS)
|
||||
|
||||
CCOMMON_OPT += -DMAX_PARALLEL_NUMBER=$(NUM_PARALLEL)
|
||||
|
||||
ifdef USE_SIMPLE_THREADED_LEVEL3
|
||||
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
|
||||
endif
|
||||
|
||||
ifeq ($(USE_TLS), 1)
|
||||
CCOMMON_OPT += -DUSE_TLS
|
||||
endif
|
||||
|
||||
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
|
||||
|
||||
ifndef SYMBOLPREFIX
|
||||
SYMBOLPREFIX =
|
||||
endif
|
||||
@@ -1087,15 +889,9 @@ SYMBOLSUFFIX =
|
||||
endif
|
||||
|
||||
ifndef LIBNAMESUFFIX
|
||||
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
|
||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
|
||||
else
|
||||
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
LIBPREFIX = cyg$(LIBNAMEBASE)
|
||||
else
|
||||
LIBPREFIX = lib$(LIBNAMEBASE)
|
||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
@@ -1125,12 +921,8 @@ endif
|
||||
endif
|
||||
|
||||
ifdef NO_AFFINITY
|
||||
ifeq ($(NO_AFFINITY), 0)
|
||||
override undefine NO_AFFINITY
|
||||
else
|
||||
CCOMMON_OPT += -DNO_AFFINITY
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef FUNCTION_PROFILE
|
||||
CCOMMON_OPT += -DFUNCTION_PROFILE
|
||||
@@ -1160,18 +952,17 @@ ifeq ($(OSNAME), SunOS)
|
||||
TAR = gtar
|
||||
PATCH = gpatch
|
||||
GREP = ggrep
|
||||
AWK = nawk
|
||||
else
|
||||
TAR = tar
|
||||
PATCH = patch
|
||||
GREP = grep
|
||||
AWK = awk
|
||||
endif
|
||||
|
||||
ifndef MD5SUM
|
||||
MD5SUM = md5sum
|
||||
endif
|
||||
|
||||
AWK = awk
|
||||
|
||||
REVISION = -r$(VERSION)
|
||||
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
|
||||
@@ -1189,22 +980,22 @@ COMMON_OPT = -O2
|
||||
endif
|
||||
|
||||
ifndef FCOMMON_OPT
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
FCOMMON_OPT = -O0
|
||||
else
|
||||
FCOMMON_OPT = -O2 -frecursive
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
|
||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
override FFLAGS += $(FCOMMON_OPT)
|
||||
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
ifdef NEED_PIC
|
||||
ifeq (,$(findstring PIC,$(FFLAGS)))
|
||||
override FFLAGS += -fPIC
|
||||
endif
|
||||
endif
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
ifdef OS_WINDOWS
|
||||
@@ -1262,12 +1053,7 @@ endif
|
||||
|
||||
|
||||
LIBDLLNAME = $(LIBPREFIX).dll
|
||||
IMPLIBNAME = lib$(LIBNAMEBASE).dll.a
|
||||
ifneq ($(OSNAME), AIX)
|
||||
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
|
||||
else
|
||||
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a)
|
||||
endif
|
||||
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
|
||||
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
|
||||
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
|
||||
@@ -1287,9 +1073,6 @@ LIB_COMPONENTS += LAPACK
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
LIB_COMPONENTS += LAPACKE
|
||||
endif
|
||||
ifeq ($(BUILD_RELAPACK), 1)
|
||||
LIB_COMPONENTS += ReLAPACK
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ONLY_CBLAS), 1)
|
||||
@@ -1339,12 +1122,9 @@ export HAVE_VFP
|
||||
export HAVE_VFPV3
|
||||
export HAVE_VFPV4
|
||||
export HAVE_NEON
|
||||
export HAVE_MSA
|
||||
export MSA_FLAGS
|
||||
export KERNELDIR
|
||||
export FUNCTION_PROFILE
|
||||
export TARGET_CORE
|
||||
export NO_AVX512
|
||||
|
||||
export SGEMM_UNROLL_M
|
||||
export SGEMM_UNROLL_N
|
||||
@@ -1403,3 +1183,4 @@ SUNPATH = /opt/sunstudio12.1
|
||||
else
|
||||
SUNPATH = /opt/SUNWspro
|
||||
endif
|
||||
|
||||
|
||||
@@ -8,38 +8,6 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), SKYLAKEX)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), HASWELL)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX2
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mavx2
|
||||
endif
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x64
|
||||
endif
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
|
||||
ifeq ($(CORE), Z13)
|
||||
CCOMMON_OPT += -march=z13 -mzvector
|
||||
FCOMMON_OPT += -march=z13 -mzvector
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), Z14)
|
||||
CCOMMON_OPT += -march=z14 -mzvector
|
||||
FCOMMON_OPT += -march=z14 -mzvector
|
||||
endif
|
||||
223
README.md
223
README.md
@@ -2,227 +2,140 @@
|
||||
|
||||
[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
|
||||
|
||||
[](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop)
|
||||
|
||||
## Introduction
|
||||
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
Please read the documentation on the OpenBLAS wiki pages: <https://github.com/xianyi/OpenBLAS/wiki>.
|
||||
Please read the documents on OpenBLAS wiki pages <http://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
## Binary Packages
|
||||
|
||||
We provide official binary packages for the following platform:
|
||||
We provide binary packages for the following platform.
|
||||
|
||||
* Windows x86/x86_64
|
||||
|
||||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
|
||||
|
||||
## Installation from Source
|
||||
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||
|
||||
Download from project homepage, https://xianyi.github.com/OpenBLAS/, or check out the code
|
||||
using Git from https://github.com/xianyi/OpenBLAS.git.
|
||||
|
||||
### Dependencies
|
||||
|
||||
Building OpenBLAS requires the following to be installed:
|
||||
|
||||
* GNU Make
|
||||
* A C compiler, e.g. GCC or Clang
|
||||
* A Fortran compiler (optional, for LAPACK)
|
||||
* IBM MASS (optional, see below)
|
||||
|
||||
Or, check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
### Normal compile
|
||||
|
||||
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
|
||||
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
|
||||
The full target list is in the file `TargetList.txt`.
|
||||
* type "make" to detect the CPU automatically.
|
||||
or
|
||||
* type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt.
|
||||
|
||||
### Cross compile
|
||||
|
||||
Set `CC` and `FC` to point to the cross toolchains, and set `HOSTCC` to your host C compiler.
|
||||
The target must be specified explicitly when cross compiling.
|
||||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
||||
|
||||
Examples:
|
||||
|
||||
* On an x86 box, compile this library for a loongson3a CPU:
|
||||
```sh
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
```
|
||||
On X86 box, compile this library for loongson3a CPU.
|
||||
|
||||
* On an x86 box, compile this library for a loongson3a CPU with loongcc (based on Open64) compiler:
|
||||
```sh
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
```
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
|
||||
On X86 box, compile this library for loongson3a CPU with loongcc (based on Open64) compiler.
|
||||
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
|
||||
### Debug version
|
||||
|
||||
A debug version can be built using `make DEBUG=1`.
|
||||
make DEBUG=1
|
||||
|
||||
### Compile with MASS support on Power CPU (optional)
|
||||
### Install to the directory (optional)
|
||||
|
||||
The [IBM MASS](https://www.ibm.com/support/home/product/W511326D80541V01/other_software/mathematical_acceleration_subsystem) library consists of a set of mathematical functions for C, C++, and Fortran applications that are tuned for optimum performance on POWER architectures.
|
||||
OpenBLAS with MASS requires a 64-bit, little-endian OS on POWER.
|
||||
The library can be installed as shown:
|
||||
Example:
|
||||
|
||||
* On Ubuntu:
|
||||
```sh
|
||||
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -
|
||||
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list
|
||||
sudo apt-get update
|
||||
sudo apt-get install libxlmass-devel.8.1.5
|
||||
```
|
||||
make install PREFIX=your_installation_directory
|
||||
|
||||
* On RHEL/CentOS:
|
||||
```sh
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key
|
||||
sudo rpm --import repomd.xml.key
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo
|
||||
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/
|
||||
sudo yum install libxlmass-devel.8.1.5
|
||||
```
|
||||
The default directory is /opt/OpenBLAS
|
||||
|
||||
After installing the MASS library, compile OpenBLAS with `USE_MASS=1`.
|
||||
For example, to compile on Power8 with MASS support: `make USE_MASS=1 TARGET=POWER8`.
|
||||
## Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
### Install to a specific directory (optional)
|
||||
|
||||
Use `PREFIX=` when invoking `make`, for example
|
||||
|
||||
```sh
|
||||
make install PREFIX=your_installation_directory
|
||||
```
|
||||
|
||||
The default installation directory is `/opt/OpenBLAS`.
|
||||
|
||||
## Supported CPUs and Operating Systems
|
||||
|
||||
Please read `GotoBLAS_01Readme.txt`.
|
||||
|
||||
### Additional supported CPUs
|
||||
|
||||
#### x86/x86-64
|
||||
### Additional support CPU:
|
||||
|
||||
#### x86/x86-64:
|
||||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
|
||||
- **Intel Skylake**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD ZEN**: Uses Haswell codes with some optimizations.
|
||||
|
||||
#### MIPS64
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
- **ICT Loongson 3B**: Experimental
|
||||
|
||||
#### ARM
|
||||
#### ARM:
|
||||
- **ARMV6**: Optimized BLAS for vfpv2 and vfpv3-d16 ( e.g. BCM2835, Cortex M0+ )
|
||||
- **ARMV7**: Optimized BLAS for vfpv3-d32 ( e.g. Cortex A8, A9 and A15 )
|
||||
|
||||
- **ARMv6**: Optimized BLAS for vfpv2 and vfpv3-d16 (e.g. BCM2835, Cortex M0+)
|
||||
- **ARMv7**: Optimized BLAS for vfpv3-d32 (e.g. Cortex A8, A9 and A15)
|
||||
|
||||
#### ARM64
|
||||
|
||||
- **ARMv8**: Experimental
|
||||
- **ARM Cortex-A57**: Experimental
|
||||
|
||||
#### PPC/PPC64
|
||||
|
||||
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
|
||||
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
|
||||
|
||||
#### IBM zEnterprise System
|
||||
|
||||
- **Z13**: Optimized Level-3 BLAS and Level-1,2 (double precision)
|
||||
- **Z14**: Optimized Level-3 BLAS and Level-1,2 (single precision)
|
||||
|
||||
### Supported OS
|
||||
#### ARM64:
|
||||
- **ARMV8**: Experimental
|
||||
|
||||
### Support OS:
|
||||
- **GNU/Linux**
|
||||
- **MinGW or Visual Studio (CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/macOS**: Experimental. Although GotoBLAS2 supports Darwin, we are not macOS experts.
|
||||
- **FreeBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **OpenBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **DragonFly BSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
||||
|
||||
## Usage
|
||||
## Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
Statically link with `libopenblas.a` or dynamically link with `-lopenblas` if OpenBLAS was
|
||||
compiled as a shared library.
|
||||
### Set the number of threads with environment variables.
|
||||
|
||||
### Setting the number of threads using environment variables
|
||||
Examples:
|
||||
|
||||
Environment variables are used to specify a maximum number of threads.
|
||||
For example,
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
|
||||
```sh
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
export GOTO_NUM_THREADS=4
|
||||
export OMP_NUM_THREADS=4
|
||||
```
|
||||
or
|
||||
|
||||
The priorities are `OPENBLAS_NUM_THREADS` > `GOTO_NUM_THREADS` > `OMP_NUM_THREADS`.
|
||||
export GOTO_NUM_THREADS=4
|
||||
|
||||
If you compile this library with `USE_OPENMP=1`, you should set the `OMP_NUM_THREADS`
|
||||
environment variable; OpenBLAS ignores `OPENBLAS_NUM_THREADS` and `GOTO_NUM_THREADS` when
|
||||
compiled with `USE_OPENMP=1`.
|
||||
or
|
||||
|
||||
### Setting the number of threads at runtime
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
We provide the following functions to control the number of threads at runtime:
|
||||
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
```c
|
||||
void goto_set_num_threads(int num_threads);
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
```
|
||||
If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1.
|
||||
|
||||
If you compile this library with `USE_OPENMP=1`, you should use the above functions too.
|
||||
### Set the number of threads on runtime.
|
||||
|
||||
## Reporting bugs
|
||||
We provided the below functions to control the number of threads on runtime.
|
||||
|
||||
Please submit an issue in https://github.com/xianyi/OpenBLAS/issues.
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
|
||||
|
||||
## Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
## Contact
|
||||
|
||||
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
|
||||
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev
|
||||
|
||||
## Change log
|
||||
|
||||
Please see Changelog.txt to view the differences between OpenBLAS and GotoBLAS2 1.13 BSD version.
|
||||
## ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* Please read the [FAQ](https://github.com/xianyi/OpenBLAS/wiki/Faq) first.
|
||||
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
|
||||
Clang 3.0 will generate the wrong AVX binary code.
|
||||
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake AVX512 kernels.
|
||||
* The number of CPUs/cores should less than or equal to 256. On Linux `x86_64` (`amd64`),
|
||||
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
|
||||
the library with `BIGNUMA=1`.
|
||||
* OpenBLAS does not set processor affinity by default.
|
||||
On Linux, you can enable processor affinity by commenting out the line `NO_AFFINITY=1` in
|
||||
Makefile.rule. However, note that this may cause
|
||||
[a conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A, `make test` may fail with a `pthread_create` error (`EAGAIN`).
|
||||
However, it will be okay when you run the same test case on the shell.
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue
|
||||
to start a discussion around a feature idea or a bug.
|
||||
2. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
3. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
4. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue to start a discussion around a feature idea or a bug.
|
||||
1. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
1. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
1. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
|
||||
## Donation
|
||||
|
||||
Please read [this wiki page](https://github.com/xianyi/OpenBLAS/wiki/Donation).
|
||||
|
||||
@@ -20,7 +20,6 @@ DUNNINGTON
|
||||
NEHALEM
|
||||
SANDYBRIDGE
|
||||
HASWELL
|
||||
SKYLAKEX
|
||||
ATOM
|
||||
|
||||
b)AMD CPU:
|
||||
@@ -35,7 +34,6 @@ BULLDOZER
|
||||
PILEDRIVER
|
||||
STEAMROLLER
|
||||
EXCAVATOR
|
||||
ZEN
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
@@ -48,7 +46,6 @@ POWER5
|
||||
POWER6
|
||||
POWER7
|
||||
POWER8
|
||||
POWER9
|
||||
PPCG4
|
||||
PPC970
|
||||
PPC970MP
|
||||
@@ -56,44 +53,26 @@ PPC440
|
||||
PPC440FP2
|
||||
CELL
|
||||
|
||||
3.MIPS CPU:
|
||||
P5600
|
||||
1004K
|
||||
|
||||
4.MIPS64 CPU:
|
||||
3.MIPS64 CPU:
|
||||
SICORTEX
|
||||
LOONGSON3A
|
||||
LOONGSON3B
|
||||
I6400
|
||||
P6600
|
||||
I6500
|
||||
|
||||
5.IA64 CPU:
|
||||
4.IA64 CPU:
|
||||
ITANIUM2
|
||||
|
||||
6.SPARC CPU:
|
||||
5.SPARC CPU:
|
||||
SPARC
|
||||
SPARCV7
|
||||
|
||||
7.ARM CPU:
|
||||
6.ARM CPU:
|
||||
CORTEXA15
|
||||
CORTEXA9
|
||||
ARMV7
|
||||
ARMV6
|
||||
ARMV5
|
||||
|
||||
8.ARM 64-bit CPU:
|
||||
7.ARM 64-bit CPU:
|
||||
ARMV8
|
||||
CORTEXA53
|
||||
CORTEXA57
|
||||
CORTEXA72
|
||||
CORTEXA73
|
||||
FALKOR
|
||||
THUNDERX
|
||||
THUNDERX2T99
|
||||
TSV110
|
||||
|
||||
9.System Z:
|
||||
ZARCH_GENERIC
|
||||
Z13
|
||||
Z14
|
||||
|
||||
213
USAGE.md
213
USAGE.md
@@ -1,213 +0,0 @@
|
||||
# Notes on OpenBLAS usage
|
||||
## Usage
|
||||
|
||||
#### Program is Terminated. Because you tried to allocate too many memory regions
|
||||
|
||||
In OpenBLAS, we mange a pool of memory buffers and allocate the number of
|
||||
buffers as the following.
|
||||
```
|
||||
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
|
||||
```
|
||||
This error indicates that the program exceeded the number of buffers.
|
||||
|
||||
Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
|
||||
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
|
||||
`MAX_CPU_NUMBER=NUM_THREADS`.
|
||||
|
||||
Despite its name, and due to the use of memory buffers in functions like SGEMM,
|
||||
the setting of NUM_THREADS can be relevant even for a single-threaded build
|
||||
of OpenBLAS, if such functions get called by multiple threads of a program
|
||||
that uses OpenBLAS. In some cases, the affected code may simply crash or throw
|
||||
a segmentation fault without displaying the above warning first.
|
||||
|
||||
Note that the number of threads used at runtime can be altered to differ from the
|
||||
value NUM_THREADS was set to at build time. At runtime, the actual number of
|
||||
threads can be set anywhere from 1 to the build's NUM_THREADS (note however,
|
||||
that this does not change the number of memory buffers that will be allocated,
|
||||
which is set at build time). The number of threads for a process can be set by
|
||||
using the mechanisms described below.
|
||||
|
||||
|
||||
#### How can I use OpenBLAS in multi-threaded applications?
|
||||
|
||||
If your application is already multi-threaded, it will conflict with OpenBLAS
|
||||
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the
|
||||
following ways:
|
||||
|
||||
* `export OPENBLAS_NUM_THREADS=1` in the environment variables.
|
||||
* Call `openblas_set_num_threads(1)` in the application on runtime.
|
||||
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0`
|
||||
|
||||
If the application is parallelized by OpenMP, please use OpenBLAS built with
|
||||
`USE_OPENMP=1`
|
||||
|
||||
#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH
|
||||
|
||||
The environment variable which control the kernel selection is
|
||||
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export
|
||||
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()`
|
||||
returns the used target.
|
||||
|
||||
#### How could I disable OpenBLAS threading affinity on runtime?
|
||||
|
||||
You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment
|
||||
variable to disable threading affinity on runtime. For example, before the
|
||||
running,
|
||||
```
|
||||
export OPENBLAS_MAIN_FREE=1
|
||||
```
|
||||
|
||||
Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1`
|
||||
in `Makefile.rule`.
|
||||
|
||||
## Linking with the library
|
||||
|
||||
* Link with shared library
|
||||
|
||||
`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas`
|
||||
|
||||
If the library is multithreaded, please add `-lpthread`. If the library
|
||||
contains LAPACK functions, please add `-lgfortran` or other Fortran libs.
|
||||
|
||||
* Link with static library
|
||||
|
||||
`gcc -o test test.c /your/path/libopenblas.a`
|
||||
|
||||
You can download `test.c` from https://gist.github.com/xianyi/5780018
|
||||
|
||||
On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by
|
||||
default), custom programs statically linked against `libopenblas.a` should also
|
||||
link with the pthread library e.g.:
|
||||
|
||||
```
|
||||
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread
|
||||
```
|
||||
|
||||
Failing to add the `-lpthread` flag will cause errors such as:
|
||||
|
||||
```
|
||||
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory':
|
||||
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock'
|
||||
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock'
|
||||
...
|
||||
```
|
||||
|
||||
## Code examples
|
||||
|
||||
#### Call CBLAS interface
|
||||
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656
|
||||
```
|
||||
#include <cblas.h>
|
||||
#include <stdio.h>
|
||||
|
||||
void main()
|
||||
{
|
||||
int i=0;
|
||||
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
|
||||
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
|
||||
|
||||
for(i=0; i<9; i++)
|
||||
printf("%lf ", C[i]);
|
||||
printf("\n");
|
||||
}
|
||||
```
|
||||
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran`
|
||||
|
||||
#### Call BLAS Fortran interface
|
||||
|
||||
This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018
|
||||
|
||||
```
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "sys/time.h"
|
||||
#include "time.h"
|
||||
|
||||
extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*);
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int i;
|
||||
printf("test!\n");
|
||||
if(argc<4){
|
||||
printf("Input Error\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
int m = atoi(argv[1]);
|
||||
int n = atoi(argv[2]);
|
||||
int k = atoi(argv[3]);
|
||||
int sizeofa = m * k;
|
||||
int sizeofb = k * n;
|
||||
int sizeofc = m * n;
|
||||
char ta = 'N';
|
||||
char tb = 'N';
|
||||
double alpha = 1.2;
|
||||
double beta = 0.001;
|
||||
|
||||
struct timeval start,finish;
|
||||
double duration;
|
||||
|
||||
double* A = (double*)malloc(sizeof(double) * sizeofa);
|
||||
double* B = (double*)malloc(sizeof(double) * sizeofb);
|
||||
double* C = (double*)malloc(sizeof(double) * sizeofc);
|
||||
|
||||
srand((unsigned)time(NULL));
|
||||
|
||||
for (i=0; i<sizeofa; i++)
|
||||
A[i] = i%3+1;//(rand()%100)/10.0;
|
||||
|
||||
for (i=0; i<sizeofb; i++)
|
||||
B[i] = i%3+1;//(rand()%100)/10.0;
|
||||
|
||||
for (i=0; i<sizeofc; i++)
|
||||
C[i] = i%3+1;//(rand()%100)/10.0;
|
||||
//#if 0
|
||||
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);
|
||||
gettimeofday(&start, NULL);
|
||||
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);
|
||||
gettimeofday(&finish, NULL);
|
||||
|
||||
duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;
|
||||
double gflops = 2.0 * m *n*k;
|
||||
gflops = gflops/duration*1.0e-6;
|
||||
|
||||
FILE *fp;
|
||||
fp = fopen("timeDGEMM.txt", "a");
|
||||
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops);
|
||||
fclose(fp);
|
||||
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a`
|
||||
|
||||
` ./time_dgemm <m> <n> <k> `
|
||||
|
||||
## Troubleshooting
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## BLAS reference manual
|
||||
If you want to understand every BLAS function and definition, please read
|
||||
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm)
|
||||
or [netlib.org](http://netlib.org/blas/)
|
||||
|
||||
Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions)
|
||||
|
||||
## How to reference OpenBLAS.
|
||||
|
||||
You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications).
|
||||
|
||||
Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly.
|
||||
|
||||
71
appveyor.yml
71
appveyor.yml
@@ -1,12 +1,10 @@
|
||||
version: 0.2.19.{build}
|
||||
version: 0.2.15.{build}
|
||||
|
||||
#environment:
|
||||
|
||||
platform:
|
||||
- x64
|
||||
|
||||
os: Visual Studio 2017
|
||||
|
||||
configuration: Release
|
||||
|
||||
clone_folder: c:\projects\OpenBLAS
|
||||
@@ -14,68 +12,31 @@ clone_folder: c:\projects\OpenBLAS
|
||||
init:
|
||||
- git config --global core.autocrlf input
|
||||
|
||||
build:
|
||||
project: OpenBLAS.sln
|
||||
|
||||
clone_depth: 5
|
||||
|
||||
#branches to build
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
- cmake
|
||||
|
||||
skip_tags: true
|
||||
|
||||
matrix:
|
||||
fast_finish: false
|
||||
fast_finish: true
|
||||
|
||||
skip_commits:
|
||||
# Add [av skip] to commit messages
|
||||
message: /\[av skip\]/
|
||||
|
||||
environment:
|
||||
global:
|
||||
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
|
||||
matrix:
|
||||
- COMPILER: clang-cl
|
||||
WITH_FORTRAN: yes
|
||||
- COMPILER: clang-cl
|
||||
DYNAMIC_ARCH: ON
|
||||
WITH_FORTRAN: no
|
||||
- COMPILER: cl
|
||||
- COMPILER: MinGW64-gcc-7.2.0-mingw
|
||||
DYNAMIC_ARCH: OFF
|
||||
WITH_FORTRAN: ignore
|
||||
- COMPILER: MinGW64-gcc-7.2.0
|
||||
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
|
||||
COMPILER: MinGW-gcc-5.3.0
|
||||
WITH_FORTRAN: ignore
|
||||
|
||||
install:
|
||||
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
|
||||
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
|
||||
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake
|
||||
|
||||
- if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja
|
||||
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja
|
||||
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang
|
||||
|
||||
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
|
||||
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
|
||||
|
||||
before_build:
|
||||
- ps: if (-Not (Test-Path .\build)) { mkdir build }
|
||||
- cd build
|
||||
- set PATH=%PATH:C:\Program Files\Git\usr\bin;=%
|
||||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] set PATH=C:\MinGW\bin;C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
|
||||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] set PATH=C:\MinGW\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
|
||||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0] set PATH=C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
|
||||
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" ..
|
||||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0] cmake -G "MSYS Makefiles" -DBINARY=32 -DNOFORTRAN=1 ..
|
||||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
|
||||
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
|
||||
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
|
||||
|
||||
build_script:
|
||||
- cmake --build .
|
||||
- echo Running cmake...
|
||||
- cd c:\projects\OpenBLAS
|
||||
- cmake -G "Visual Studio 12 Win64" .
|
||||
|
||||
test_script:
|
||||
- echo Running Test
|
||||
- cd utest
|
||||
- openblas_utest
|
||||
|
||||
- echo Build OK!
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
trigger:
|
||||
# start a new build for every push
|
||||
batch: False
|
||||
branches:
|
||||
include:
|
||||
- develop
|
||||
|
||||
jobs:
|
||||
# manylinux1 is useful to test because the
|
||||
# standard Docker container uses an old version
|
||||
# of gcc / glibc
|
||||
- job: manylinux1_gcc
|
||||
pool:
|
||||
vmImage: 'ubuntu-16.04'
|
||||
steps:
|
||||
- script: |
|
||||
echo "FROM quay.io/pypa/manylinux1_x86_64
|
||||
COPY . /tmp/openblas
|
||||
RUN cd /tmp/openblas && \
|
||||
COMMON_FLAGS='DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32' && \
|
||||
BTYPE='BINARY=64' CC=gcc && \
|
||||
make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE && \
|
||||
make -C test $COMMON_FLAGS $BTYPE && \
|
||||
make -C ctest $COMMON_FLAGS $BTYPE && \
|
||||
make -C utest $COMMON_FLAGS $BTYPE" > Dockerfile
|
||||
docker build .
|
||||
displayName: Run manylinux1 docker build
|
||||
- job: Intel_SDE_skx
|
||||
pool:
|
||||
vmImage: 'ubuntu-16.04'
|
||||
steps:
|
||||
- script: |
|
||||
# at the time of writing the available Azure Ubuntu vm image
|
||||
# does not support AVX512VL, so use more recent LTS version
|
||||
echo "FROM ubuntu:bionic
|
||||
COPY . /tmp/openblas
|
||||
RUN apt-get -y update && apt-get -y install \\
|
||||
cmake \\
|
||||
gfortran \\
|
||||
make \\
|
||||
wget
|
||||
RUN mkdir /tmp/SDE && cd /tmp/SDE && \\
|
||||
mkdir sde-external-8.35.0-2019-03-11-lin && \\
|
||||
wget --quiet -O sde-external-8.35.0-2019-03-11-lin.tar.bz2 https://www.dropbox.com/s/fopsnzj67572sj5/sde-external-8.35.0-2019-03-11-lin.tar.bz2?dl=0 && \\
|
||||
tar -xjvf sde-external-8.35.0-2019-03-11-lin.tar.bz2 -C /tmp/SDE/sde-external-8.35.0-2019-03-11-lin --strip-components=1
|
||||
RUN cd /tmp/openblas && CC=gcc make QUIET_MAKE=1 DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64
|
||||
CMD cd /tmp/openblas && echo 0 > /proc/sys/kernel/yama/ptrace_scope && CC=gcc OPENBLAS_VERBOSE=2 /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/sde64 -cpuid_in /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/misc/cpuid/skx/cpuid.def -- make -C utest DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64" > Dockerfile
|
||||
docker build -t intel_sde .
|
||||
# we need a privileged docker run for sde process attachment
|
||||
docker run --privileged intel_sde
|
||||
displayName: 'Run AVX512 SkylakeX docker build / test'
|
||||
@@ -33,22 +33,6 @@ LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread
|
||||
# Apple vecLib
|
||||
LIBVECLIB = -framework Accelerate
|
||||
|
||||
ESSL=/opt/ibm/lib
|
||||
#LIBESSL = -lesslsmp $(ESSL)/libxlomp_ser.so.1 $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
|
||||
LIBESSL = -lesslsmp $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
|
||||
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
GOTO_LAPACK_TARGETS=slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
|
||||
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
|
||||
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
|
||||
csymv.goto zsymv.goto \
|
||||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
||||
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto
|
||||
else
|
||||
GOTO_LAPACK_TARGETS=
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
|
||||
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||
@@ -60,7 +44,6 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
||||
sger.goto dger.goto cger.goto zger.goto \
|
||||
sdot.goto ddot.goto \
|
||||
srot.goto drot.goto \
|
||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
||||
sswap.goto dswap.goto cswap.goto zswap.goto \
|
||||
@@ -159,29 +142,31 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||
|
||||
else
|
||||
|
||||
goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
||||
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
|
||||
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
||||
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \
|
||||
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \
|
||||
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \
|
||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \
|
||||
sger.goto dger.goto cger.goto zger.goto \
|
||||
sdot.goto ddot.goto cdot.goto zdot.goto \
|
||||
srot.goto drot.goto \
|
||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \
|
||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \
|
||||
sswap.goto dswap.goto cswap.goto zswap.goto \
|
||||
sscal.goto dscal.goto cscal.goto zscal.goto \
|
||||
sasum.goto dasum.goto casum.goto zasum.goto \
|
||||
ssymv.goto dsymv.goto \
|
||||
ssymv.goto dsymv.goto csymv.goto zsymv.goto \
|
||||
chemv.goto zhemv.goto \
|
||||
chemm.goto zhemm.goto \
|
||||
cherk.goto zherk.goto \
|
||||
cher2k.goto zher2k.goto \
|
||||
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \
|
||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
|
||||
smallscaling \
|
||||
isamax.goto idamax.goto icamax.goto izamax.goto \
|
||||
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS)
|
||||
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \
|
||||
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
|
||||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
||||
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
|
||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto
|
||||
|
||||
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
||||
@@ -234,9 +219,7 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \
|
||||
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \
|
||||
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \
|
||||
spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \
|
||||
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas \
|
||||
isamax.atlas idamax.atlas icamax.atlas izamax.atlas \
|
||||
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto
|
||||
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas
|
||||
|
||||
mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||
scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \
|
||||
@@ -269,11 +252,7 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \
|
||||
|
||||
endif
|
||||
|
||||
essl :: sgemm.essl strmm.essl dgemm.essl dtrmm.essl \
|
||||
cgemm.essl ctrmm.essl zgemm.essl ztrmm.essl \
|
||||
slinpack.essl clinpack.essl dlinpack.essl zlinpack.essl \
|
||||
scholesky.essl ccholesky.essl dcholesky.essl zcholesky.essl \
|
||||
strsm.essl dtrsm.essl ctrsm.essl ztrsm.essl
|
||||
|
||||
|
||||
veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \
|
||||
scholesky.veclib dcholesky.veclib ccholesky.veclib zcholesky.veclib \
|
||||
@@ -326,9 +305,6 @@ slinpack.mkl : slinpack.$(SUFFIX)
|
||||
slinpack.veclib : slinpack.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
slinpack.essl : slinpack.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Dlinpack ####################################################
|
||||
dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
@@ -345,9 +321,6 @@ dlinpack.mkl : dlinpack.$(SUFFIX)
|
||||
dlinpack.veclib : dlinpack.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dlinpack.essl : dlinpack.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Clinpack ####################################################
|
||||
|
||||
clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -365,9 +338,6 @@ clinpack.mkl : clinpack.$(SUFFIX)
|
||||
clinpack.veclib : clinpack.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
clinpack.essl : clinpack.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Zlinpack ####################################################
|
||||
|
||||
zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -385,9 +355,6 @@ zlinpack.mkl : zlinpack.$(SUFFIX)
|
||||
zlinpack.veclib : zlinpack.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
zlinpack.essl : zlinpack.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Scholesky ###################################################
|
||||
|
||||
scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -405,9 +372,6 @@ scholesky.mkl : scholesky.$(SUFFIX)
|
||||
scholesky.veclib : scholesky.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
scholesky.essl : scholesky.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Dcholesky ###################################################
|
||||
|
||||
dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -425,9 +389,6 @@ dcholesky.mkl : dcholesky.$(SUFFIX)
|
||||
dcholesky.veclib : dcholesky.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dcholesky.essl : dcholesky.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ccholesky ###################################################
|
||||
|
||||
ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -445,9 +406,6 @@ ccholesky.mkl : ccholesky.$(SUFFIX)
|
||||
ccholesky.veclib : ccholesky.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ccholesky.essl : ccholesky.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
|
||||
##################################### Zcholesky ###################################################
|
||||
|
||||
@@ -466,9 +424,6 @@ zcholesky.mkl : zcholesky.$(SUFFIX)
|
||||
zcholesky.veclib : zcholesky.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
zcholesky.essl : zcholesky.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Sgemm ####################################################
|
||||
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
@@ -485,9 +440,6 @@ sgemm.mkl : sgemm.$(SUFFIX)
|
||||
sgemm.veclib : sgemm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
sgemm.essl : sgemm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Dgemm ####################################################
|
||||
dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
@@ -504,9 +456,6 @@ dgemm.mkl : dgemm.$(SUFFIX)
|
||||
dgemm.veclib : dgemm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dgemm.essl : dgemm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Cgemm ####################################################
|
||||
|
||||
cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -524,9 +473,6 @@ cgemm.mkl : cgemm.$(SUFFIX)
|
||||
cgemm.veclib : cgemm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
cgemm.essl : cgemm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Zgemm ####################################################
|
||||
|
||||
zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -544,9 +490,6 @@ zgemm.mkl : zgemm.$(SUFFIX)
|
||||
zgemm.veclib : zgemm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
zgemm.essl : zgemm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ssymm ####################################################
|
||||
ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
@@ -629,9 +572,6 @@ strmm.mkl : strmm.$(SUFFIX)
|
||||
strmm.veclib : strmm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
strmm.essl : strmm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Dtrmm ####################################################
|
||||
dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
@@ -648,9 +588,6 @@ dtrmm.mkl : dtrmm.$(SUFFIX)
|
||||
dtrmm.veclib : dtrmm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dtrmm.essl : dtrmm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ctrmm ####################################################
|
||||
|
||||
ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -668,9 +605,6 @@ ctrmm.mkl : ctrmm.$(SUFFIX)
|
||||
ctrmm.veclib : ctrmm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ctrmm.essl : ctrmm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ztrmm ####################################################
|
||||
|
||||
ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -688,9 +622,6 @@ ztrmm.mkl : ztrmm.$(SUFFIX)
|
||||
ztrmm.veclib : ztrmm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ztrmm.essl : ztrmm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Strsm ####################################################
|
||||
strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
@@ -707,9 +638,6 @@ strsm.mkl : strsm.$(SUFFIX)
|
||||
strsm.veclib : strsm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
strsm.essl : strsm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Dtrsm ####################################################
|
||||
dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
@@ -726,9 +654,6 @@ dtrsm.mkl : dtrsm.$(SUFFIX)
|
||||
dtrsm.veclib : dtrsm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
dtrsm.essl : dtrsm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ctrsm ####################################################
|
||||
|
||||
ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -746,9 +671,6 @@ ctrsm.mkl : ctrsm.$(SUFFIX)
|
||||
ctrsm.veclib : ctrsm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ctrsm.essl : ctrsm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ztrsm ####################################################
|
||||
|
||||
ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME)
|
||||
@@ -766,9 +688,6 @@ ztrsm.mkl : ztrsm.$(SUFFIX)
|
||||
ztrsm.veclib : ztrsm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
ztrsm.essl : ztrsm.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Ssyrk ####################################################
|
||||
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
@@ -1493,39 +1412,6 @@ zdot.mkl : zdot-intel.$(SUFFIX)
|
||||
zdot.veclib : zdot-intel.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Srot ####################################################
|
||||
srot.goto : srot.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
srot.acml : srot.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
srot.atlas : srot.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
srot.mkl : srot.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
srot.veclib : srot.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Drot ####################################################
|
||||
drot.goto : drot.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
drot.acml : drot.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
drot.atlas : drot.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
drot.mkl : drot.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
drot.veclib : drot.$(SUFFIX)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
|
||||
##################################### Saxpy ####################################################
|
||||
saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
@@ -1947,63 +1833,6 @@ zgemm3m.mkl : zgemm3m.$(SUFFIX)
|
||||
zgemm3m.veclib : zgemm3m.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
############################################## ISAMAX ##############################################
|
||||
isamax.goto : isamax.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
isamax.atlas : isamax.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
############################################## IDAMAX ##############################################
|
||||
idamax.goto : idamax.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
idamax.atlas : idamax.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
############################################## ICAMAX ##############################################
|
||||
icamax.goto : icamax.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
icamax.atlas : icamax.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
############################################## IZAMAX ##############################################
|
||||
izamax.goto : izamax.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
izamax.atlas : izamax.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
############################################## SNRM2 ##############################################
|
||||
snrm2.goto : snrm2.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
snrm2.atlas : snrm2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
############################################## DNRM2 ##############################################
|
||||
dnrm2.goto : dnrm2.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
dnrm2.atlas : dnrm2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
############################################## Sscnrm2 ##############################################
|
||||
scnrm2.goto : scnrm2.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
scnrm2.atlas : scnrm2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
############################################## Ddznrm2 ##############################################
|
||||
dznrm2.goto : dznrm2.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
dznrm2.atlas : dznrm2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
|
||||
###################################################################################################
|
||||
|
||||
slinpack.$(SUFFIX) : linpack.c
|
||||
@@ -2294,13 +2123,6 @@ cgesv.$(SUFFIX) : gesv.c
|
||||
zgesv.$(SUFFIX) : gesv.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
srot.$(SUFFIX) : rot.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
drot.$(SUFFIX) : rot.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -2311,37 +2133,8 @@ zgemm3m.$(SUFFIX) : gemm3m.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
|
||||
isamax.$(SUFFIX) : iamax.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
idamax.$(SUFFIX) : iamax.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
icamax.$(SUFFIX) : iamax.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
izamax.$(SUFFIX) : iamax.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
|
||||
snrm2.$(SUFFIX) : nrm2.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
dnrm2.$(SUFFIX) : nrm2.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
scnrm2.$(SUFFIX) : nrm2.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
dznrm2.$(SUFFIX) : nrm2.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
|
||||
smallscaling: smallscaling.c ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm -lpthread
|
||||
|
||||
clean ::
|
||||
@rm -f *.goto *.mkl *.acml *.atlas *.veclib *.essl smallscaling
|
||||
@rm -f *.goto *.mkl *.acml *.atlas *.veclib
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
||||
|
||||
@@ -183,9 +183,9 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -184,8 +184,8 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
138
benchmark/gemm.c
138
benchmark/gemm.c
@@ -121,15 +121,13 @@ static void *huge_malloc(BLASLONG size){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
char transa = 'N';
|
||||
char transb = 'N';
|
||||
blasint m, n, k, i, j, lda, ldb, ldc;
|
||||
char trans='N';
|
||||
blasint m, n, i, j;
|
||||
int loops = 1;
|
||||
int has_param_m = 0;
|
||||
int has_param_n = 0;
|
||||
int has_param_k = 0;
|
||||
int has_param_n=0;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
@@ -137,108 +135,86 @@ int main(int argc, char *argv[]){
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1, timeg;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++; }
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++; }
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) {
|
||||
transa=*p;
|
||||
transb=*p;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_TRANSA"))) {
|
||||
transa=*p;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_TRANSB"))) {
|
||||
transb=*p;
|
||||
}
|
||||
TOUPPER(transa);
|
||||
TOUPPER(transb);
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb);
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL ) {
|
||||
loops = atoi(p);
|
||||
}
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
if ((p = getenv("OPENBLAS_PARAM_M"))) {
|
||||
m = atoi(p);
|
||||
has_param_m=1;
|
||||
} else {
|
||||
m = to;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
} else {
|
||||
n = to;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_PARAM_K"))) {
|
||||
k = atoi(p);
|
||||
has_param_k=1;
|
||||
} else {
|
||||
k = to;
|
||||
}
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < m * k * COMPSIZE; i++) {
|
||||
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < k * n * COMPSIZE; i++) {
|
||||
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < m * n * COMPSIZE; i++) {
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for (i = from; i <= to; i += step) {
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
if (!has_param_m) { m = i; }
|
||||
if (!has_param_n) { n = i; }
|
||||
if (!has_param_k) { k = i; }
|
||||
if ( has_param_n == 1 && n <= m )
|
||||
n=n;
|
||||
else
|
||||
n=m;
|
||||
|
||||
if (transa == 'N') { lda = m; }
|
||||
else { lda = k; }
|
||||
if (transb == 'N') { ldb = k; }
|
||||
else { ldb = n; }
|
||||
ldc = m;
|
||||
|
||||
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
for (j=0; j<loops; j++) {
|
||||
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
|
||||
}
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
|
||||
|
||||
}
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg = time1/loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)k * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -122,7 +122,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 0.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
@@ -221,7 +221,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
}
|
||||
@@ -258,7 +258,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,192 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef IAMAX
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define IAMAX BLASFUNC(izamax)
|
||||
#else
|
||||
#define IAMAX BLASFUNC(icamax)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define IAMAX BLASFUNC(idamax)
|
||||
#else
|
||||
#define IAMAX BLASFUNC(isamax)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
IAMAX (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
193
benchmark/nrm2.c
193
benchmark/nrm2.c
@@ -1,193 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef NRM2
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define NRM2 BLASFUNC(dznrm2)
|
||||
#else
|
||||
#define NRM2 BLASFUNC(scnrm2)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define NRM2 BLASFUNC(dnrm2)
|
||||
#else
|
||||
#define NRM2 BLASFUNC(snrm2)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
NRM2 (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
197
benchmark/rot.c
197
benchmark/rot.c
@@ -1,197 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define ROT BLASFUNC(drot)
|
||||
#else
|
||||
#define ROT BLASFUNC(srot)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
// FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
FLOAT c[1] = { 2.0 };
|
||||
FLOAT s[1] = { 2.0 };
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -189,9 +189,9 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
@@ -2,47 +2,61 @@
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
if (!is.null(options("matprod")[[1]])) options(matprod = "blas")
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
loops <- 1
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
if (length(argv) > 0) {
|
||||
for (z in 1:length(argv)) {
|
||||
if (z == 1) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if (z == 2) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if (z == 3) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if (z == 4) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n", nfrom, nto, nstep, loops))
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(rnorm(n * n), nrow = n)
|
||||
ev <- 0
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
ev <- eigen(A)
|
||||
})
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
mflops <- (26.66 * n * n * n) * loops / (z[3] * 1e+06)
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
ev <- eigen(A)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
n <- n + nstep
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2,50 +2,62 @@
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
if (!is.null(options("matprod")[[1]])) options(matprod = "blas")
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
loops <- 1
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
if (length(argv) > 0) {
|
||||
for (z in 1:length(argv)) {
|
||||
if (z == 1) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if (z == 2) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if (z == 3) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if (z == 4) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n", nfrom, nto, nstep, loops))
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(runif(n * n), nrow = n)
|
||||
B <- matrix(runif(n * n), nrow = n)
|
||||
C <- 1
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
C <- A %*% B
|
||||
l <- l + 1
|
||||
})
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
mflops <- (2.0 * n * n * n) * loops / (z[3] * 1e+06)
|
||||
start <- proc.time()[3]
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
while ( l <= loops ) {
|
||||
|
||||
C <- A %*% B
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
n <- n + nstep
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -2,48 +2,62 @@
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
if (!is.null(options("matprod")[[1]])) options(matprod = "blas")
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
nfrom <- 128
|
||||
nto <- 2048
|
||||
nstep <- 128
|
||||
loops <- 1
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
if (length(argv) > 0) {
|
||||
for (z in 1:length(argv)) {
|
||||
if (z == 1) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if (z == 2) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if (z == 3) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if (z == 4) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p <- Sys.getenv("OPENBLAS_LOOPS")
|
||||
if (p != "") {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n", nfrom, nto, nstep, loops))
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n <- nfrom
|
||||
while (n <= nto) {
|
||||
A <- matrix(rnorm(n * n), nrow = n)
|
||||
B <- matrix(rnorm(n * n), nrow = n)
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
z <- system.time(for (l in 1:loops) {
|
||||
solve(A, B)
|
||||
})
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
mflops <- (8.0 / 3 * n * n * n) * loops / (z[3] * 1e+06)
|
||||
start <- proc.time()[3]
|
||||
|
||||
st <- sprintf("%.0fx%.0f :", n, n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3]))
|
||||
while ( l <= loops ) {
|
||||
|
||||
solve(A,B)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
n <- n + nstep
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy import zeros
|
||||
from numpy.random import randn
|
||||
from scipy.linalg import blas
|
||||
|
||||
|
||||
def run_dsyrk(N, l):
|
||||
|
||||
A = randn(N, N).astype('float64', order='F')
|
||||
C = zeros((N, N), dtype='float64', order='F')
|
||||
|
||||
start = time.time()
|
||||
for i in range(0, l):
|
||||
blas.dsyrk(1.0, A, c=C, overwrite_c=True)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end - start)
|
||||
mflops = (N * N * N) * l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N, N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N = 128
|
||||
NMAX = 2048
|
||||
NINC = 128
|
||||
LOOPS = 1
|
||||
|
||||
z = 0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p)
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range(N, NMAX + NINC, NINC):
|
||||
run_dsyrk(i, LOOPS)
|
||||
@@ -1,58 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy import zeros
|
||||
from numpy.random import randn
|
||||
from scipy.linalg import blas
|
||||
|
||||
|
||||
def run_ssyrk(N, l):
|
||||
|
||||
A = randn(N, N).astype('float32', order='F')
|
||||
C = zeros((N, N), dtype='float32', order='F')
|
||||
|
||||
start = time.time()
|
||||
for i in range(0, l):
|
||||
blas.ssyrk(1.0, A, c=C, overwrite_c=True)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end - start)
|
||||
mflops = (N * N * N) * l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N, N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N = 128
|
||||
NMAX = 2048
|
||||
NINC = 128
|
||||
LOOPS = 1
|
||||
|
||||
z = 0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p)
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range(N, NMAX + NINC, NINC):
|
||||
run_ssyrk(i, LOOPS)
|
||||
@@ -1,197 +0,0 @@
|
||||
// run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
#include <cblas.h>
|
||||
#include <omp.h>
|
||||
#include <pthread.h>
|
||||
#define MIN_SIZE 5
|
||||
#define MAX_SIZE 60
|
||||
#define NB_SIZE 10
|
||||
|
||||
// number of loop for a 1x1 matrix. Lower it if the test is
|
||||
// too slow on you computer.
|
||||
#define NLOOP 2e7
|
||||
|
||||
typedef struct {
|
||||
int matrix_size;
|
||||
int n_loop;
|
||||
void (* bench_func)();
|
||||
void (* blas_func)();
|
||||
void * (* create_matrix)(int size);
|
||||
} BenchParam;
|
||||
|
||||
void * s_create_matrix(int size) {
|
||||
float * r = malloc(size * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void * c_create_matrix(int size) {
|
||||
float * r = malloc(size * 2 * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < 2 * size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void * z_create_matrix(int size) {
|
||||
double * r = malloc(size * 2 * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < 2 * size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void * d_create_matrix(int size) {
|
||||
double * r = malloc(size * sizeof(double));
|
||||
int i;
|
||||
for(i = 0; i < size; i++)
|
||||
r[i] = 1e3 * i / size;
|
||||
return r;
|
||||
}
|
||||
|
||||
void trmv_bench(BenchParam * param)
|
||||
{
|
||||
int i, n;
|
||||
int size = param->matrix_size;
|
||||
n = param->n_loop / size;
|
||||
int one = 1;
|
||||
void * A = param->create_matrix(size * size);
|
||||
void * y = param->create_matrix(size);
|
||||
for(i = 0; i < n; i++) {
|
||||
param->blas_func("U", "N", "N", &size, A, &size, y, &one);
|
||||
}
|
||||
free(A);
|
||||
free(y);
|
||||
}
|
||||
|
||||
void gemv_bench(BenchParam * param)
|
||||
{
|
||||
int i, n;
|
||||
int size = param->matrix_size;
|
||||
n = param->n_loop / size;
|
||||
double v = 1.01;
|
||||
int one = 1;
|
||||
void * A = param->create_matrix(size * size);
|
||||
void * y = param->create_matrix(size);
|
||||
for(i = 0; i < n; i++) {
|
||||
param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one);
|
||||
}
|
||||
free(A);
|
||||
free(y);
|
||||
}
|
||||
|
||||
void ger_bench(BenchParam * param) {
|
||||
int i, n;
|
||||
int size = param->matrix_size;
|
||||
n = param->n_loop / size;
|
||||
double v = 1.01;
|
||||
int one = 1;
|
||||
void * A = param->create_matrix(size * size);
|
||||
void * y = param->create_matrix(size);
|
||||
for(i = 0; i < n; i++) {
|
||||
param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size);
|
||||
}
|
||||
free(A);
|
||||
free(y);
|
||||
}
|
||||
|
||||
#ifndef _WIN32
|
||||
void * pthread_func_wrapper(void * param) {
|
||||
((BenchParam *)param)->bench_func(param);
|
||||
pthread_exit(NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define NB_TESTS 5
|
||||
void * TESTS[4 * NB_TESTS] = {
|
||||
trmv_bench, ztrmv_, z_create_matrix, "ztrmv",
|
||||
gemv_bench, dgemv_, d_create_matrix, "dgemv",
|
||||
gemv_bench, zgemv_, z_create_matrix, "zgemv",
|
||||
ger_bench, dger_, d_create_matrix, "dger",
|
||||
ger_bench, zgerc_, z_create_matrix, "zgerc",
|
||||
};
|
||||
|
||||
inline static double delta_time(struct timespec tick) {
|
||||
struct timespec tock;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tock);
|
||||
return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9;
|
||||
}
|
||||
|
||||
double pthread_bench(BenchParam * param, int nb_threads)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return 0;
|
||||
#else
|
||||
BenchParam threaded_param = *param;
|
||||
pthread_t threads[nb_threads];
|
||||
int t, rc;
|
||||
struct timespec tick;
|
||||
threaded_param.n_loop /= nb_threads;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||
for(t=0; t<nb_threads; t++){
|
||||
rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param);
|
||||
if (rc){
|
||||
printf("ERROR; return code from pthread_create() is %d\n", rc);
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
for(t=0; t<nb_threads; t++){
|
||||
pthread_join(threads[t], NULL);
|
||||
}
|
||||
return delta_time(tick);
|
||||
#endif
|
||||
}
|
||||
|
||||
double seq_bench(BenchParam * param) {
|
||||
struct timespec tick;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||
param->bench_func(param);
|
||||
return delta_time(tick);
|
||||
}
|
||||
|
||||
double omp_bench(BenchParam * param) {
|
||||
BenchParam threaded_param = *param;
|
||||
struct timespec tick;
|
||||
int t;
|
||||
int nb_threads = omp_get_max_threads();
|
||||
threaded_param.n_loop /= nb_threads;
|
||||
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||
#pragma omp parallel for
|
||||
for(t = 0; t < nb_threads; t ++){
|
||||
param->bench_func(&threaded_param);
|
||||
}
|
||||
return delta_time(tick);
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[]) {
|
||||
double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE);
|
||||
BenchParam param;
|
||||
int test_id;
|
||||
printf ("Running on %d threads\n", omp_get_max_threads());
|
||||
for(test_id = 0; test_id < NB_TESTS; test_id ++) {
|
||||
double size = MIN_SIZE;
|
||||
param.bench_func = TESTS[test_id * 4];
|
||||
param.blas_func = TESTS[test_id * 4 + 1];
|
||||
param.create_matrix = TESTS[test_id * 4 + 2];
|
||||
printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]);
|
||||
param.n_loop = NLOOP;
|
||||
while(size <= MAX_SIZE) {
|
||||
param.matrix_size = (int)(size + 0.5);
|
||||
double seq_time = seq_bench(¶m);
|
||||
double omp_time = omp_bench(¶m);
|
||||
double pthread_time = pthread_bench(¶m, omp_get_max_threads());
|
||||
printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, "
|
||||
"pthread %gs, speedup %g\n",
|
||||
param.matrix_size, seq_time,
|
||||
omp_time, seq_time / omp_time,
|
||||
pthread_time, seq_time / pthread_time);
|
||||
size *= inc_factor;
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -191,8 +191,8 @@ int main(int argc, char *argv[]){
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6, time1);
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -184,8 +184,8 @@ int main(int argc, char *argv[]){
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
|
||||
108
c_check
108
c_check
@@ -1,8 +1,5 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
#use File::Basename;
|
||||
# use File::Temp qw(tempfile);
|
||||
|
||||
# Checking cross compile
|
||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||
@@ -10,9 +7,7 @@ $hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
|
||||
$hostarch = "zarch" if ($hostarch eq "s390x");
|
||||
|
||||
#$tmpf = new File::Temp( UNLINK => 1 );
|
||||
$binary = $ENV{"BINARY"};
|
||||
|
||||
$makefile = shift(@ARGV);
|
||||
@@ -31,24 +26,13 @@ if ($?) {
|
||||
|
||||
$cross_suffix = "";
|
||||
|
||||
eval "use File::Basename";
|
||||
if ($@){
|
||||
warn "could not load PERL module File::Basename, emulating its functionality";
|
||||
my $dirnam = substr($compiler_name, 0, rindex($compiler_name, "/")-1 );
|
||||
if ($dirnam ne ".") {
|
||||
$cross_suffix .= $dirnam . "/";
|
||||
}
|
||||
my $basnam = substr($compiler_name, rindex($compiler_name,"/")+1, length($compiler_name)-rindex($compiler_name,"/")-1);
|
||||
if ($basnam =~ /([^\s]*-)(.*)/) {
|
||||
$cross_suffix .= $1;
|
||||
if ($ARGV[0] =~ /(.*)(-[.\d]+)/) {
|
||||
if ($1 =~ /(.*-)(.*)/) {
|
||||
$cross_suffix = $1;
|
||||
}
|
||||
} else {
|
||||
if (dirname($compiler_name) ne ".") {
|
||||
$cross_suffix .= dirname($compiler_name) . "/";
|
||||
}
|
||||
|
||||
if (basename($compiler_name) =~ /([^\s]*-)(.*)/) {
|
||||
$cross_suffix .= $1;
|
||||
if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) {
|
||||
$cross_suffix = $1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -67,8 +51,6 @@ $compiler = GCC if ($compiler eq "");
|
||||
$os = Linux if ($data =~ /OS_LINUX/);
|
||||
$os = FreeBSD if ($data =~ /OS_FREEBSD/);
|
||||
$os = NetBSD if ($data =~ /OS_NETBSD/);
|
||||
$os = OpenBSD if ($data =~ /OS_OPENBSD/);
|
||||
$os = DragonFly if ($data =~ /OS_DRAGONFLY/);
|
||||
$os = Darwin if ($data =~ /OS_DARWIN/);
|
||||
$os = SunOS if ($data =~ /OS_SUNOS/);
|
||||
$os = AIX if ($data =~ /OS_AIX/);
|
||||
@@ -77,19 +59,17 @@ $os = WINNT if ($data =~ /OS_WINNT/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
|
||||
$os = Interix if ($data =~ /OS_INTERIX/);
|
||||
$os = Android if ($data =~ /OS_ANDROID/);
|
||||
$os = Haiku if ($data =~ /OS_HAIKU/);
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||
$architecture = power if ($data =~ /ARCH_POWER/);
|
||||
$architecture = mips if ($data =~ /ARCH_MIPS/);
|
||||
$architecture = mips32 if ($data =~ /ARCH_MIPS32/);
|
||||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
||||
|
||||
$defined = 0;
|
||||
|
||||
@@ -99,12 +79,7 @@ if ($os eq "AIX") {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if ($architecture eq "mips") {
|
||||
$compiler_name .= " -mabi=32";
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if ($architecture eq "mips64") {
|
||||
if (($architecture eq "mips32") || ($architecture eq "mips64")) {
|
||||
$compiler_name .= " -mabi=n32" if ($binary eq "32");
|
||||
$compiler_name .= " -mabi=64" if ($binary eq "64");
|
||||
$defined = 1;
|
||||
@@ -114,11 +89,6 @@ if (($architecture eq "arm") || ($architecture eq "arm64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if ($architecture eq "zarch") {
|
||||
$defined = 1;
|
||||
$binary = 64;
|
||||
}
|
||||
|
||||
if ($architecture eq "alpha") {
|
||||
$defined = 1;
|
||||
$binary = 64;
|
||||
@@ -182,68 +152,20 @@ if ($?) {
|
||||
die 1;
|
||||
}
|
||||
|
||||
$have_msa = 0;
|
||||
if (($architecture eq "mips") || ($architecture eq "mips64")) {
|
||||
eval "use File::Temp qw(tempfile)";
|
||||
if ($@){
|
||||
warn "could not load PERL module File::Temp, so could not check MSA capatibility";
|
||||
} else {
|
||||
$tmpf = new File::Temp( UNLINK => 1 );
|
||||
$code = '"addvi.b $w0, $w1, 1"';
|
||||
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
|
||||
print $tmpf "#include <msa.h>\n\n";
|
||||
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
||||
|
||||
$args = "$msa_flags -o $tmpf.o -x c $tmpf";
|
||||
my @cmd = ("$compiler_name $args");
|
||||
system(@cmd) == 0;
|
||||
if ($? != 0) {
|
||||
$have_msa = 0;
|
||||
} else {
|
||||
$have_msa = 1;
|
||||
}
|
||||
unlink("$tmpf.o");
|
||||
}
|
||||
}
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||
$architecture = power if ($data =~ /ARCH_POWER/);
|
||||
$architecture = mips if ($data =~ /ARCH_MIPS/);
|
||||
$architecture = mips32 if ($data =~ /ARCH_MIPS32/);
|
||||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/);
|
||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/);
|
||||
$architecture = sparc if ($data =~ /ARCH_SPARC/);
|
||||
$architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
||||
|
||||
$binformat = bin32;
|
||||
$binformat = bin64 if ($data =~ /BINARY_64/);
|
||||
|
||||
$no_avx512= 0;
|
||||
if (($architecture eq "x86") || ($architecture eq "x86_64")) {
|
||||
eval "use File::Temp qw(tempfile)";
|
||||
if ($@){
|
||||
warn "could not load PERL module File::Temp, so could not check compiler compatibility with AVX512";
|
||||
$no_avx512 = 0;
|
||||
} else {
|
||||
# $tmpf = new File::Temp( UNLINK => 1 );
|
||||
($fh,$tmpf) = tempfile( UNLINK => 1 );
|
||||
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
|
||||
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
|
||||
$args = " -march=skylake-avx512 -c -o $tmpf.o -x c $tmpf";
|
||||
my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null");
|
||||
system(@cmd) == 0;
|
||||
if ($? != 0) {
|
||||
$no_avx512 = 1;
|
||||
} else {
|
||||
$no_avx512 = 0;
|
||||
}
|
||||
unlink("$tmpf.o");
|
||||
}
|
||||
}
|
||||
|
||||
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
||||
|
||||
$data =~ /globl\s([_\.]*)(.*)/;
|
||||
@@ -251,6 +173,7 @@ $data =~ /globl\s([_\.]*)(.*)/;
|
||||
$need_fu = $1;
|
||||
|
||||
$cross = 0;
|
||||
$cross = 1 if ($os ne $hostos);
|
||||
|
||||
if ($architecture ne $hostarch) {
|
||||
$cross = 1;
|
||||
@@ -258,8 +181,6 @@ if ($architecture ne $hostarch) {
|
||||
$cross = 0 if (($hostarch eq "mips64") && ($architecture eq "mips"));
|
||||
}
|
||||
|
||||
$cross = 1 if ($os ne $hostos);
|
||||
|
||||
$openmp = "" if $ENV{USE_OPENMP} != 1;
|
||||
|
||||
$linker_L = "";
|
||||
@@ -288,11 +209,6 @@ $linker_a = "";
|
||||
$linker_L .= "-Wl,". $flags . " "
|
||||
}
|
||||
|
||||
if ($flags =~ /^\--exclude-libs/) {
|
||||
$linker_L .= "-Wl,". $flags . " ";
|
||||
$flags="";
|
||||
}
|
||||
|
||||
if (
|
||||
($flags =~ /^\-l/)
|
||||
&& ($flags !~ /gfortranbegin/)
|
||||
@@ -327,12 +243,9 @@ print MAKEFILE "BINARY64=\n" if $binformat ne bin64;
|
||||
print MAKEFILE "BINARY32=1\n" if $binformat eq bin32;
|
||||
print MAKEFILE "BINARY64=1\n" if $binformat eq bin64;
|
||||
print MAKEFILE "FU=$need_fu\n" if $need_fu ne "";
|
||||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne "";
|
||||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross_suffix ne "";
|
||||
print MAKEFILE "CROSS=1\n" if $cross != 0;
|
||||
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
|
||||
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
|
||||
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
|
||||
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
|
||||
|
||||
$os =~ tr/[a-z]/[A-Z]/;
|
||||
$architecture =~ tr/[a-z]/[A-Z]/;
|
||||
@@ -344,7 +257,6 @@ print CONFFILE "#define C_$compiler\t1\n";
|
||||
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
|
||||
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
|
||||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
||||
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
|
||||
|
||||
if ($os eq "LINUX") {
|
||||
|
||||
|
||||
191
cblas.h
191
cblas.h
@@ -51,72 +51,51 @@ typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=1
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
typedef CBLAS_ORDER CBLAS_LAYOUT;
|
||||
|
||||
|
||||
float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_ddot(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_ssum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dsum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scsum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzsum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX);
|
||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
CBLAS_INDEX cblas_isamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
CBLAS_INDEX cblas_ismax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
CBLAS_INDEX cblas_ismin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izmin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
||||
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
||||
@@ -132,59 +111,59 @@ void cblas_drotmg(double *d1, double *d2, double *b1, OPENBLAS_CONST double b2,
|
||||
|
||||
void cblas_sscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_sgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_strsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_strmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_ssyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo,OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_sgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_cgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_ssbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
@@ -197,45 +176,45 @@ void cblas_stbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLA
|
||||
void cblas_dtbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *Ap,
|
||||
@@ -246,36 +225,36 @@ void cblas_dspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLA
|
||||
void cblas_sspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *Ap);
|
||||
void cblas_dspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A);
|
||||
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X,OPENBLAS_CONST blasint incX, void *A);
|
||||
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A);
|
||||
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,OPENBLAS_CONST blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A);
|
||||
void cblas_dspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A);
|
||||
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap);
|
||||
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap);
|
||||
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *Ap);
|
||||
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_chpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *Ap, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *Ap, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
|
||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
@@ -283,60 +262,60 @@ void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
|
||||
void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_strmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_strsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_chemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zhemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
@@ -346,9 +325,9 @@ void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS
|
||||
|
||||
void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_somatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a,
|
||||
OPENBLAS_CONST blasint clda, float *b, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
# OpenBLASConfig.cmake
|
||||
# --------------------
|
||||
#
|
||||
# OpenBLAS cmake module.
|
||||
# This module sets the following variables in your project::
|
||||
#
|
||||
# OpenBLAS_FOUND - true if OpenBLAS and all required components found on the system
|
||||
# OpenBLAS_VERSION - OpenBLAS version in format Major.Minor.Release
|
||||
# OpenBLAS_INCLUDE_DIRS - Directory where OpenBLAS header is located.
|
||||
# OpenBLAS_INCLUDE_DIR - same as DIRS
|
||||
# OpenBLAS_LIBRARIES - OpenBLAS library to link against.
|
||||
# OpenBLAS_LIBRARY - same as LIBRARIES
|
||||
#
|
||||
#
|
||||
# Available components::
|
||||
#
|
||||
## shared - search for only shared library
|
||||
## static - search for only static library
|
||||
# serial - search for unthreaded library
|
||||
# pthread - search for native pthread threaded library
|
||||
# openmp - search for OpenMP threaded library
|
||||
#
|
||||
#
|
||||
# Exported targets::
|
||||
#
|
||||
# If OpenBLAS is found, this module defines the following :prop_tgt:`IMPORTED`
|
||||
## target. Target is shared _or_ static, so, for both, use separate, not
|
||||
## overlapping, installations. ::
|
||||
#
|
||||
# OpenBLAS::OpenBLAS - the main OpenBLAS library #with header & defs attached.
|
||||
#
|
||||
#
|
||||
# Suggested usage::
|
||||
#
|
||||
# find_package(OpenBLAS)
|
||||
# find_package(OpenBLAS 0.2.20 EXACT CONFIG REQUIRED COMPONENTS pthread)
|
||||
#
|
||||
#
|
||||
# The following variables can be set to guide the search for this package::
|
||||
#
|
||||
# OpenBLAS_DIR - CMake variable, set to directory containing this Config file
|
||||
# CMAKE_PREFIX_PATH - CMake variable, set to root directory of this package
|
||||
# PATH - environment variable, set to bin directory of this package
|
||||
# CMAKE_DISABLE_FIND_PACKAGE_OpenBLAS - CMake variable, disables
|
||||
# find_package(OpenBLAS) when not REQUIRED, perhaps to force internal build
|
||||
|
||||
@PACKAGE_INIT@
|
||||
|
||||
set(PN OpenBLAS)
|
||||
|
||||
# need to check that the @USE_*@ evaluate to something cmake can perform boolean logic upon
|
||||
if(@USE_OPENMP@)
|
||||
set(${PN}_openmp_FOUND 1)
|
||||
elseif(@USE_THREAD@)
|
||||
set(${PN}_pthread_FOUND 1)
|
||||
else()
|
||||
set(${PN}_serial_FOUND 1)
|
||||
endif()
|
||||
|
||||
check_required_components(${PN})
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Don't include targets if this file is being picked up by another
|
||||
# project which has already built this as a subproject
|
||||
#-----------------------------------------------------------------------------
|
||||
if(NOT TARGET ${PN}::OpenBLAS)
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake")
|
||||
|
||||
get_property(_loc TARGET ${PN}::OpenBLAS PROPERTY LOCATION)
|
||||
set(${PN}_LIBRARY ${_loc})
|
||||
get_property(_ill TARGET ${PN}::OpenBLAS PROPERTY INTERFACE_LINK_LIBRARIES)
|
||||
set(${PN}_LIBRARIES ${_ill})
|
||||
|
||||
get_property(_id TARGET ${PN}::OpenBLAS PROPERTY INCLUDE_DIRECTORIES)
|
||||
set(${PN}_INCLUDE_DIR ${_id})
|
||||
get_property(_iid TARGET ${PN}::OpenBLAS PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
set(${PN}_INCLUDE_DIRS ${_iid})
|
||||
endif()
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets various variables based on architecture.
|
||||
|
||||
if (X86 OR X86_64)
|
||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
|
||||
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
if (NOT BINARY)
|
||||
set(NO_BINARY_MODE 1)
|
||||
endif ()
|
||||
@@ -14,12 +14,12 @@ if (X86 OR X86_64)
|
||||
if (NOT NO_EXPRECISION)
|
||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
|
||||
set(EXPRECISION 1)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||
endif ()
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
|
||||
set(EXPRECISION 1)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||
@@ -28,61 +28,57 @@ if (X86 OR X86_64)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
# NO_AFFINITY = 1
|
||||
find_package(OpenMP REQUIRED)
|
||||
if (OpenMP_FOUND)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} ${OpenMP_C_FLAGS} -DUSE_OPENMP")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} ${OpenMP_Fortran_FLAGS}")
|
||||
endif()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
|
||||
message(WARNING "Clang doesn't support OpenMP yet.")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (ARM64)
|
||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99)
|
||||
endif ()
|
||||
|
||||
if (X86)
|
||||
set(DYNAMIC_CORE KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
|
||||
endif ()
|
||||
|
||||
if (X86_64)
|
||||
set(DYNAMIC_CORE PRESCOTT CORE2)
|
||||
if (DYNAMIC_OLDER)
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} PENRYN DUNNINGTON)
|
||||
endif ()
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} NEHALEM)
|
||||
if (DYNAMIC_OLDER)
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} OPTERON OPTERON_SSE3)
|
||||
endif ()
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} BARCELONA)
|
||||
if (DYNAMIC_OLDER)
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} BOBCAT ATOM NANO)
|
||||
endif ()
|
||||
if (${ARCH} STREQUAL "x86_64")
|
||||
set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
|
||||
if (NOT NO_AVX)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR)
|
||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
|
||||
endif ()
|
||||
if (NOT NO_AVX2)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
|
||||
endif ()
|
||||
if (NOT NO_AVX512)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
|
||||
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
|
||||
endif ()
|
||||
if (DYNAMIC_LIST)
|
||||
set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST})
|
||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT DYNAMIC_CORE)
|
||||
message (STATUS "DYNAMIC_ARCH is not supported on this architecture, removing from options")
|
||||
unset(DYNAMIC_ARCH CACHE)
|
||||
unset(DYNAMIC_ARCH)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
@@ -91,14 +87,14 @@ if (${ARCH} STREQUAL "ia64")
|
||||
set(BINARY_DEFINED 1)
|
||||
|
||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU")
|
||||
# EXPRECISION = 1
|
||||
# CCOMMON_OPT += -DEXPRECISION
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
endif ()
|
||||
|
||||
@@ -107,12 +103,12 @@ if (${ARCH} STREQUAL "alpha")
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (ARM)
|
||||
if (${ARCH} STREQUAL "arm")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (ARM64)
|
||||
if (${ARCH} STREQUAL "arm64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
89
cmake/c_check.cmake
Normal file
89
cmake/c_check.cmake
Normal file
@@ -0,0 +1,89 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from the OpenBLAS/c_check perl script.
|
||||
## This is triggered by prebuild.cmake and runs before any of the code is built.
|
||||
## Creates config.h and Makefile.conf.
|
||||
|
||||
# CMake vars set by this file:
|
||||
# OSNAME (use CMAKE_SYSTEM_NAME)
|
||||
# ARCH
|
||||
# C_COMPILER (use CMAKE_C_COMPILER)
|
||||
# BINARY32
|
||||
# BINARY64
|
||||
# FU
|
||||
# CROSS_SUFFIX
|
||||
# CROSS
|
||||
# CEXTRALIB
|
||||
|
||||
# Defines set by this file:
|
||||
# OS_
|
||||
# ARCH_
|
||||
# C_
|
||||
# __32BIT__
|
||||
# __64BIT__
|
||||
# FUNDERSCORE
|
||||
# PTHREAD_CREATE_FUNC
|
||||
|
||||
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
|
||||
set(FU "")
|
||||
if(APPLE)
|
||||
set(FU "_")
|
||||
elseif(MSVC)
|
||||
set(FU "_")
|
||||
elseif(UNIX)
|
||||
set(FU "")
|
||||
endif()
|
||||
|
||||
# Convert CMake vars into the format that OpenBLAS expects
|
||||
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
|
||||
if (${HOST_OS} STREQUAL "WINDOWS")
|
||||
set(HOST_OS WINNT)
|
||||
endif ()
|
||||
|
||||
# added by hpa - check size of void ptr to detect 64-bit compile
|
||||
if (NOT DEFINED BINARY)
|
||||
set(BINARY 32)
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(BINARY 64)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (BINARY EQUAL 64)
|
||||
set(BINARY64 1)
|
||||
else ()
|
||||
set(BINARY32 1)
|
||||
endif ()
|
||||
|
||||
# CMake docs define these:
|
||||
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
|
||||
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
|
||||
#
|
||||
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
|
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||
if (${ARCH} STREQUAL "AMD64")
|
||||
set(ARCH "x86_64")
|
||||
endif ()
|
||||
|
||||
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
|
||||
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
|
||||
set(ARCH x86)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "X86")
|
||||
set(ARCH x86)
|
||||
endif ()
|
||||
|
||||
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
|
||||
if (${COMPILER_ID} STREQUAL "GNU")
|
||||
set(COMPILER_ID "GCC")
|
||||
endif ()
|
||||
|
||||
string(TOUPPER ${ARCH} UC_ARCH)
|
||||
|
||||
file(WRITE ${TARGET_CONF}
|
||||
"#define OS_${HOST_OS}\t1\n"
|
||||
"#define ARCH_${UC_ARCH}\t1\n"
|
||||
"#define C_${COMPILER_ID}\t1\n"
|
||||
"#define __${BINARY}BIT__\t1\n"
|
||||
"#define FUNDERSCORE\t${FU}\n")
|
||||
|
||||
@@ -15,7 +15,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR
|
||||
|
||||
if (NO_BINARY_MODE)
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
@@ -24,12 +24,17 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3A" OR ${CORE} STREQUAL "LOONGSON3B")
|
||||
if (${CORE} STREQUAL "LOONGSON3A")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_SYSTEM_NAME STREQUAL "AIX")
|
||||
if (${CORE} STREQUAL "LOONGSON3B")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
|
||||
endif ()
|
||||
|
||||
if (${OSNAME} STREQUAL "AIX")
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
endif ()
|
||||
@@ -61,7 +66,7 @@ endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
|
||||
if (NOT BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
|
||||
@@ -89,10 +94,10 @@ endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -w")
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
@@ -51,10 +51,9 @@ else()
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/openblas.def
|
||||
#TARGET ${OpenBLAS_LIBNAME} PRE_LINK
|
||||
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
|
||||
COMMAND perl
|
||||
ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
|
||||
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
|
||||
COMMENT "Create openblas.def file"
|
||||
VERBATIM)
|
||||
|
||||
|
||||
@@ -20,6 +20,12 @@
|
||||
# NEEDBUNDERSCORE
|
||||
# NEED2UNDERSCORES
|
||||
|
||||
if (MSVC)
|
||||
# had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
|
||||
include(CMakeForceCompiler)
|
||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
|
||||
endif ()
|
||||
|
||||
if (NOT NO_LAPACK)
|
||||
enable_language(Fortran)
|
||||
else()
|
||||
@@ -28,13 +34,17 @@ else()
|
||||
endif()
|
||||
|
||||
if (NOT ONLY_CBLAS)
|
||||
# N.B. f_check is not cross-platform, so instead try to use CMake variables
|
||||
# run f_check (appends to TARGET files)
|
||||
# message(STATUS "Running f_check...")
|
||||
# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
|
||||
# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
|
||||
# TODO: set FEXTRALIB flags a la f_check?
|
||||
|
||||
set(BU "_")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n"
|
||||
"#define NEED2UNDERSCORES 0\n")
|
||||
@@ -46,7 +56,7 @@ else ()
|
||||
set(NO_FBLAS 1)
|
||||
#set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
|
||||
set(BU "_")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n")
|
||||
endif()
|
||||
|
||||
@@ -3,21 +3,6 @@
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets Fortran related variables.
|
||||
|
||||
if (INTERFACE64)
|
||||
set(SUFFIX64 64)
|
||||
set(SUFFIX64_UNDERSCORE _64)
|
||||
endif()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "FLANG")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG")
|
||||
if (BINARY64 AND INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "G77")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
@@ -44,16 +29,13 @@ endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
|
||||
# ensure reentrancy of lapack codes
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive")
|
||||
# work around ABI violation in passing string arguments from C
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fno-optimize-sibling-calls")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
if (NOT NO_LAPACK)
|
||||
set(EXTRALIB "{EXTRALIB} -lgfortran")
|
||||
endif ()
|
||||
if (NO_BINARY_MODE)
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
@@ -133,7 +115,7 @@ if (${F_COMPILER} STREQUAL "PATHSCALE")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT MIPS64)
|
||||
if (NOT ${ARCH} STREQUAL "mips64")
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
@@ -161,7 +143,7 @@ if (${F_COMPILER} STREQUAL "OPEN64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
|
||||
@@ -192,7 +174,7 @@ endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# helper functions for the kernel CMakeLists.txt
|
||||
|
||||
|
||||
# Set the default filenames for L1 objects. Most of these will be overridden by the appropriate KERNEL file.
|
||||
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
|
||||
macro(SetDefaultL1)
|
||||
set(SAMAXKERNEL amax.S)
|
||||
set(DAMAXKERNEL amax.S)
|
||||
@@ -107,12 +107,6 @@ macro(SetDefaultL1)
|
||||
set(DAXPBYKERNEL ../arm/axpby.c)
|
||||
set(CAXPBYKERNEL ../arm/zaxpby.c)
|
||||
set(ZAXPBYKERNEL ../arm/zaxpby.c)
|
||||
set(SSUMKERNEL sum.S)
|
||||
set(DSUMKERNEL sum.S)
|
||||
set(CSUMKERNEL zsum.S)
|
||||
set(ZSUMKERNEL zsum.S)
|
||||
set(QSUMKERNEL sum.S)
|
||||
set(XSUMKERNEL zsum.S)
|
||||
endmacro ()
|
||||
|
||||
macro(SetDefaultL2)
|
||||
@@ -168,4 +162,4 @@ macro(SetDefaultL3)
|
||||
set(DGEADD_KERNEL ../generic/geadd.c)
|
||||
set(CGEADD_KERNEL ../generic/zgeadd.c)
|
||||
set(ZGEADD_KERNEL ../generic/zgeadd.c)
|
||||
endmacro ()
|
||||
endmacro ()
|
||||
@@ -1,485 +1,347 @@
|
||||
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
|
||||
|
||||
set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F
|
||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
|
||||
../INSTALL/ilaver.f xerbla_array.f
|
||||
../INSTALL/slamch.f)
|
||||
set(ALLAUX
|
||||
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
|
||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
|
||||
../INSTALL/ilaver.f ../INSTALL/slamch.f
|
||||
)
|
||||
|
||||
set(SCLAUX
|
||||
sbdsdc.f
|
||||
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
|
||||
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
|
||||
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
|
||||
slagts.f slamrg.f slanst.f
|
||||
slapy2.f slapy3.f slarnv.f
|
||||
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
|
||||
slarrk.f slarrr.f slaneg.f
|
||||
slartg.f slaruv.f slas2.f slascl.f
|
||||
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
|
||||
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
|
||||
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
|
||||
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
|
||||
ssteqr.f ssterf.f slaisnan.f sisnan.f
|
||||
slartgp.f slartgs.f
|
||||
../INSTALL/second_${TIMER}.f)
|
||||
sbdsdc.f
|
||||
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
|
||||
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
|
||||
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
|
||||
slagts.f slamrg.f slanst.f
|
||||
slapy2.f slapy3.f slarnv.f
|
||||
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
|
||||
slarrk.f slarrr.f slaneg.f
|
||||
slartg.f slaruv.f slas2.f slascl.f
|
||||
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
|
||||
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
|
||||
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
|
||||
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
|
||||
ssteqr.f ssterf.f slaisnan.f sisnan.f
|
||||
slartgp.f slartgs.f
|
||||
../INSTALL/second_${TIMER}.f
|
||||
)
|
||||
|
||||
set(DZLAUX
|
||||
dbdsdc.f
|
||||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
|
||||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
|
||||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
|
||||
dlagts.f dlamrg.f dlanst.f
|
||||
dlapy2.f dlapy3.f dlarnv.f
|
||||
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
|
||||
dlarrk.f dlarrr.f dlaneg.f
|
||||
dlartg.f dlaruv.f dlas2.f dlascl.f
|
||||
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
|
||||
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
|
||||
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
|
||||
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
|
||||
dsteqr.f dsterf.f dlaisnan.f disnan.f
|
||||
dlartgp.f dlartgs.f
|
||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f)
|
||||
dbdsdc.f
|
||||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
|
||||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
|
||||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
|
||||
dlagts.f dlamrg.f dlanst.f
|
||||
dlapy2.f dlapy3.f dlarnv.f
|
||||
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
|
||||
dlarrk.f dlarrr.f dlaneg.f
|
||||
dlartg.f dlaruv.f dlas2.f dlascl.f
|
||||
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
|
||||
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
|
||||
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
|
||||
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
|
||||
dsteqr.f dsterf.f dlaisnan.f disnan.f
|
||||
dlartgp.f dlartgs.f
|
||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
|
||||
)
|
||||
|
||||
set(SLASRC
|
||||
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
||||
sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
||||
sgels.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f
|
||||
sgeqp3.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgerq2.f sgerqf.f
|
||||
sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f sgetc2.f
|
||||
sgetrf2.f sgetri.f
|
||||
sggbak.f sggbal.f
|
||||
sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f
|
||||
sggglm.f sgghrd.f sgghd3.f sgglse.f sggqrf.f
|
||||
sggrqf.f sggsvd3.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
|
||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
||||
slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
||||
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
||||
slapll.f slapmt.f
|
||||
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
|
||||
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
|
||||
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
|
||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
|
||||
slarrv.f slartv.f
|
||||
slarz.f slarzb.f slarzt.f slasy2.f
|
||||
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
|
||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f
|
||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f
|
||||
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
|
||||
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
|
||||
spbstf.f spbsv.f spbsvx.f
|
||||
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
|
||||
sposvx.f spotrf2.f spotri.f spstrf.f spstf2.f
|
||||
sppcon.f sppequ.f
|
||||
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
|
||||
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
|
||||
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
|
||||
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
|
||||
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
|
||||
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
|
||||
sstevx.f ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
|
||||
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
|
||||
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
|
||||
ssyswapr.f ssytrs.f ssytrs2.f
|
||||
ssyconv.f ssyconvf.f ssyconvf_rook.f
|
||||
ssysv_aa.f ssysv_aa_2stage.f ssytrf_aa.f ssytrf_aa_2stage.f ssytrs_aa.f ssytrs_aa_2stage.f
|
||||
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
|
||||
ssytri_rook.f ssycon_rook.f ssysv_rook.f
|
||||
ssytf2_rk.f ssytrf_rk.f ssytrs_3.f
|
||||
ssytri_3.f ssytri_3x.f ssycon_3.f ssysv_rk.f
|
||||
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f
|
||||
stbcon.f
|
||||
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
|
||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
||||
stptrs.f
|
||||
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
||||
strtrs.f stzrzf.f sstemr.f
|
||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
||||
sgeequb.f ssyequb.f spoequb.f sgbequb.f
|
||||
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
|
||||
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
|
||||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
|
||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f
|
||||
sgelqt.f sgelqt3.f sgemlqt.f
|
||||
sgetsls.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
|
||||
sgelq.f slaswlq.f slamswlq.f sgemlq.f
|
||||
stplqt.f stplqt2.f stpmlqt.f
|
||||
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
|
||||
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
|
||||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f)
|
||||
|
||||
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
|
||||
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
|
||||
sla_syrfsx_extended.f sla_syamv.f sla_syrcond.f sla_syrpvgrw.f
|
||||
sposvxx.f sporfsx.f sla_porfsx_extended.f sla_porcond.f
|
||||
sla_porpvgrw.f sgbsvxx.f sgbrfsx.f sla_gbrfsx_extended.f
|
||||
sla_gbamv.f sla_gbrcond.f sla_gbrpvgrw.f sla_lin_berr.f slarscl2.f
|
||||
slascl2.f sla_wwaddw.f)
|
||||
|
||||
set(CLASRC
|
||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
||||
cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
||||
cgels.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
||||
cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f cgerq2.f cgerqf.f
|
||||
cgesc2.f cgesdd.f cgesvd.f cgesvdx.f
|
||||
cgesvj.f cgejsv.f cgsvj0.f cgsvj1.f
|
||||
cgesvx.f cgetc2.f cgetrf2.f
|
||||
cgetri.f
|
||||
cggbak.f cggbal.f
|
||||
cgges.f cgges3.f cggesx.f cggev.f cggev3.f cggevx.f
|
||||
cggglm.f cgghrd.f cgghd3.f cgglse.f cggqrf.f cggrqf.f
|
||||
cggsvd3.f cggsvp3.f
|
||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
||||
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
|
||||
chetf2.f chetrd.f
|
||||
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
|
||||
chetrs.f chetrs2.f
|
||||
chetf2_rook.f chetrf_rook.f chetri_rook.f
|
||||
chetrs_rook.f checon_rook.f chesv_rook.f
|
||||
chetf2_rk.f chetrf_rk.f chetri_3.f chetri_3x.f
|
||||
chetrs_3.f checon_3.f chesv_rk.f
|
||||
chesv_aa.f chesv_aa_2stage.f chetrf_aa.f chetrf_aa_2stage.f chetrs_aa.f chetrs_aa_2stage.f
|
||||
chgeqz.f chpcon.f chpev.f chpevd.f
|
||||
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
|
||||
chpsvx.f
|
||||
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
|
||||
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
|
||||
claed0.f claed7.f claed8.f
|
||||
claein.f claesy.f claev2.f clags2.f clagtm.f
|
||||
clahef.f clahef_rook.f clahef_rk.f clahef_aa.f clahqr.f
|
||||
clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
||||
clanhb.f clanhe.f
|
||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
||||
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
|
||||
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
|
||||
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
|
||||
clarf.f clarfb.f clarfg.f clarfgp.f clarft.f
|
||||
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
||||
clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
|
||||
clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
||||
cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
||||
cposv.f cposvx.f cpotrf2.f cpotri.f cpstrf.f cpstf2.f
|
||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
||||
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
|
||||
crot.f cspcon.f csprfs.f cspsv.f
|
||||
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
|
||||
cstegr.f cstein.f csteqr.f csycon.f
|
||||
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f
|
||||
csytri2.f csytri2x.f csyswapr.f
|
||||
csytrs.f csytrs2.f
|
||||
csyconv.f csyconvf.f csyconvf_rook.f
|
||||
csytf2_rook.f csytrf_rook.f csytrs_rook.f
|
||||
csytri_rook.f csycon_rook.f csysv_rook.f
|
||||
csytf2_rk.f csytrf_rk.f csytrf_aa.f csytrf_aa_2stage.f csytrs_3.f csytrs_aa.f csytrs_aa_2stage.f
|
||||
csytri_3.f csytri_3x.f csycon_3.f csysv_rk.f csysv_aa.f csysv_aa_2stage.f
|
||||
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
|
||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
||||
ctprfs.f ctptri.f
|
||||
ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
||||
ctrsyl.f ctrtrs.f ctzrzf.f cung2l.f cung2r.f
|
||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f cunm22.f
|
||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
||||
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
|
||||
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
|
||||
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
|
||||
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
|
||||
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
|
||||
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
|
||||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
|
||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f
|
||||
cgelqt.f cgelqt3.f cgemlqt.f
|
||||
cgetsls.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
|
||||
cgelq.f claswlq.f clamswlq.f cgemlq.f
|
||||
ctplqt.f ctplqt2.f ctpmlqt.f
|
||||
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
|
||||
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
|
||||
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f)
|
||||
|
||||
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
|
||||
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
|
||||
csysvxx.f csyrfsx.f cla_syrfsx_extended.f cla_syamv.f
|
||||
cla_syrcond_c.f cla_syrcond_x.f cla_syrpvgrw.f
|
||||
cposvxx.f cporfsx.f cla_porfsx_extended.f
|
||||
cla_porcond_c.f cla_porcond_x.f cla_porpvgrw.f
|
||||
cgbsvxx.f cgbrfsx.f cla_gbrfsx_extended.f cla_gbamv.f
|
||||
cla_gbrcond_c.f cla_gbrcond_x.f cla_gbrpvgrw.f
|
||||
chesvxx.f cherfsx.f cla_herfsx_extended.f cla_heamv.f
|
||||
cla_hercond_c.f cla_hercond_x.f cla_herpvgrw.f
|
||||
cla_lin_berr.f clarscl2.f clascl2.f cla_wwaddw.f)
|
||||
|
||||
set(DLASRC
|
||||
dbdsvdx.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
||||
dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
||||
dgels.f dgelsd.f dgelss.f dgelsy.f dgeql2.f dgeqlf.f
|
||||
dgeqp3.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f dgerq2.f dgerqf.f
|
||||
dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f dgetc2.f
|
||||
dgetrf2.f dgetri.f
|
||||
dggbak.f dggbal.f
|
||||
dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f
|
||||
dggglm.f dgghrd.f dgghd3.f dgglse.f dggqrf.f
|
||||
dggrqf.f dggsvd3.f dggsvp3.f dgtcon.f dgtrfs.f dgtsv.f
|
||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
||||
dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
||||
dlapll.f dlapmt.f
|
||||
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
|
||||
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
|
||||
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
|
||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
|
||||
dlargv.f dlarrv.f dlartv.f
|
||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f
|
||||
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
|
||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f
|
||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f
|
||||
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
|
||||
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
|
||||
dpbstf.f dpbsv.f dpbsvx.f
|
||||
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
|
||||
dposvx.f dpotrf2.f dpotri.f dpotrs.f dpstrf.f dpstf2.f
|
||||
dppcon.f dppequ.f
|
||||
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
|
||||
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
|
||||
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
|
||||
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
|
||||
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
|
||||
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
|
||||
dstevx.f dsycon.f dsyev.f dsyevd.f dsyevr.f
|
||||
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
|
||||
dsysv.f dsysvx.f
|
||||
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytrs.f dsytrs2.f
|
||||
dsytri2.f dsytri2x.f dsyswapr.f
|
||||
dsyconv.f dsyconvf.f dsyconvf_rook.f
|
||||
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
|
||||
dsytri_rook.f dsycon_rook.f dsysv_rook.f
|
||||
dsytf2_rk.f dsytrf_rk.f dsytrs_3.f
|
||||
dsytri_3.f dsytri_3x.f dsycon_3.f dsysv_rk.f
|
||||
dsysv_aa.f dsysv_aa_2stage.f dsytrf_aa.f dsytrf_aa_2stage.f dsytrs_aa.f dsytrs_aa_2stage.f
|
||||
dtbcon.f
|
||||
dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
|
||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
||||
dtptrs.f
|
||||
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
||||
dtrtrs.f dtzrzf.f dstemr.f
|
||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
||||
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
|
||||
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
|
||||
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
|
||||
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
|
||||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
|
||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f
|
||||
dgelqt.f dgelqt3.f dgemlqt.f
|
||||
dgetsls.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
|
||||
dgelq.f dlaswlq.f dlamswlq.f dgemlq.f
|
||||
dtplqt.f dtplqt2.f dtpmlqt.f
|
||||
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
|
||||
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
|
||||
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f)
|
||||
|
||||
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
|
||||
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
|
||||
dla_syrfsx_extended.f dla_syamv.f dla_syrcond.f dla_syrpvgrw.f
|
||||
dposvxx.f dporfsx.f dla_porfsx_extended.f dla_porcond.f
|
||||
dla_porpvgrw.f dgbsvxx.f dgbrfsx.f dla_gbrfsx_extended.f
|
||||
dla_gbamv.f dla_gbrcond.f dla_gbrpvgrw.f dla_lin_berr.f dlarscl2.f
|
||||
dlascl2.f dla_wwaddw.f)
|
||||
|
||||
set(ZLASRC
|
||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
||||
zgehd2.f zgehrd.f zgelq2.f zgelqf.f
|
||||
zgels.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
||||
zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
||||
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvx.f
|
||||
zgesvj.f zgejsv.f zgsvj0.f zgsvj1.f
|
||||
zgetc2.f zgetrf2.f
|
||||
zgetri.f
|
||||
zggbak.f zggbal.f
|
||||
zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f
|
||||
zggglm.f zgghrd.f zgghd3.f zgglse.f zggqrf.f zggrqf.f
|
||||
zggsvd3.f zggsvp3.f
|
||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
||||
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
|
||||
zhetf2.f zhetrd.f
|
||||
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
|
||||
zhetrs.f zhetrs2.f
|
||||
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f
|
||||
zhetrs_rook.f zhecon_rook.f zhesv_rook.f
|
||||
zhetf2_rk.f zhetrf_rk.f zhetri_3.f zhetri_3x.f
|
||||
zhetrs_3.f zhecon_3.f zhesv_rk.f
|
||||
zhesv_aa.f zhesv_aa_2stage.f zhetrf_aa.f zhetrf_aa_2stage.f zhetrs_aa.f zhetrs_aa_2stage.f
|
||||
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
|
||||
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
|
||||
zhpsvx.f
|
||||
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
|
||||
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
|
||||
zlaed0.f zlaed7.f zlaed8.f
|
||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
||||
zlahef.f zlahef_rook.f zlahef_rk.f zlahef_aa.f zlahqr.f
|
||||
zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
||||
zlangt.f zlanhb.f
|
||||
zlanhe.f
|
||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
||||
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
|
||||
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
|
||||
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
|
||||
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
|
||||
zlarcm.f zlarf.f zlarfb.f
|
||||
zlarfg.f zlarfgp.f zlarft.f
|
||||
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
||||
zlassq.f zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f
|
||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f
|
||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
||||
zposv.f zposvx.f zpotrf2.f zpotri.f zpotrs.f zpstrf.f zpstf2.f
|
||||
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
|
||||
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
|
||||
zrot.f zspcon.f zsprfs.f zspsv.f
|
||||
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
|
||||
zstegr.f zstein.f zsteqr.f zsycon.f
|
||||
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f
|
||||
zsytri2.f zsytri2x.f zsyswapr.f
|
||||
zsytrs.f zsytrs2.f
|
||||
zsyconv.f zsyconvf.f zsyconvf_rook.f
|
||||
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f zsytrs_aa.f zsytrs_aa_2stage.f
|
||||
zsytri_rook.f zsycon_rook.f zsysv_rook.f
|
||||
zsytf2_rk.f zsytrf_rk.f zsytrf_aa.f zsytrf_aa_2stage.f zsytrs_3.f
|
||||
zsytri_3.f zsytri_3x.f zsycon_3.f zsysv_rk.f zsysv_aa.f zsysv_aa_2stage.f
|
||||
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
|
||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
||||
ztprfs.f ztptri.f
|
||||
ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
||||
ztrsyl.f ztrtrs.f ztzrzf.f zung2l.f
|
||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f zunm22.f
|
||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
||||
zunmtr.f zupgtr.f
|
||||
zupmtr.f izmax1.f dzsum1.f zstemr.f
|
||||
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
|
||||
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
|
||||
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
|
||||
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
|
||||
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
|
||||
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
|
||||
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
|
||||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f
|
||||
ztplqt.f ztplqt2.f ztpmlqt.f
|
||||
zgelqt.f zgelqt3.f zgemlqt.f
|
||||
zgetsls.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
|
||||
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
|
||||
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
|
||||
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
|
||||
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f)
|
||||
|
||||
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
|
||||
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
|
||||
zla_syrfsx_extended.f zla_syamv.f zla_syrcond_c.f zla_syrcond_x.f
|
||||
zla_syrpvgrw.f zposvxx.f zporfsx.f zla_porfsx_extended.f
|
||||
zla_porcond_c.f zla_porcond_x.f zla_porpvgrw.f zgbsvxx.f zgbrfsx.f
|
||||
zla_gbrfsx_extended.f zla_gbamv.f zla_gbrcond_c.f zla_gbrcond_x.f
|
||||
zla_gbrpvgrw.f zhesvxx.f zherfsx.f zla_herfsx_extended.f
|
||||
zla_heamv.f zla_hercond_c.f zla_hercond_x.f zla_herpvgrw.f
|
||||
zla_lin_berr.f zlarscl2.f zlascl2.f zla_wwaddw.f)
|
||||
|
||||
|
||||
if(USE_XBLAS)
|
||||
set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC})
|
||||
endif()
|
||||
|
||||
list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f
|
||||
DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f
|
||||
DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f)
|
||||
list(APPEND DLASRC DEPRECATED/dgegs.f DEPRECATED/dgegv.f
|
||||
DEPRECATED/dgeqpf.f DEPRECATED/dgelsx.f DEPRECATED/dggsvd.f
|
||||
DEPRECATED/dggsvp.f DEPRECATED/dlahrd.f DEPRECATED/dlatzm.f DEPRECATED/dtzrqf.f)
|
||||
list(APPEND CLASRC DEPRECATED/cgegs.f DEPRECATED/cgegv.f
|
||||
DEPRECATED/cgeqpf.f DEPRECATED/cgelsx.f DEPRECATED/cggsvd.f
|
||||
DEPRECATED/cggsvp.f DEPRECATED/clahrd.f DEPRECATED/clatzm.f DEPRECATED/ctzrqf.f)
|
||||
list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f
|
||||
DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f
|
||||
DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f)
|
||||
message(STATUS "Building deprecated routines")
|
||||
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
||||
sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
||||
sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
|
||||
sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
|
||||
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f
|
||||
sgetc2.f sgetri.f
|
||||
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f
|
||||
sggglm.f sgghrd.f sgglse.f sggqrf.f
|
||||
sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
|
||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
||||
slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
||||
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
||||
slapll.f slapmt.f
|
||||
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
|
||||
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
|
||||
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
|
||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
|
||||
slarrv.f slartv.f
|
||||
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
|
||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f
|
||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f
|
||||
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
|
||||
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
|
||||
spbstf.f spbsv.f spbsvx.f
|
||||
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
|
||||
sposvx.f spstrf.f spstf2.f
|
||||
sppcon.f sppequ.f
|
||||
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
|
||||
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
|
||||
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
|
||||
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
|
||||
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
|
||||
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
|
||||
sstevx.f
|
||||
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
|
||||
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
|
||||
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
|
||||
ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
|
||||
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
|
||||
ssytri_rook.f ssycon_rook.f ssysv_rook.f
|
||||
stbcon.f
|
||||
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
|
||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
||||
stptrs.f
|
||||
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
||||
strtrs.f stzrqf.f stzrzf.f sstemr.f
|
||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
||||
sgeequb.f ssyequb.f spoequb.f sgbequb.f
|
||||
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
|
||||
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
|
||||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
|
||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
|
||||
)
|
||||
|
||||
set(DSLASRC spotrs.f)
|
||||
|
||||
set(CLASRC
|
||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
||||
cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
||||
cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
||||
cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
|
||||
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
|
||||
cgesvx.f cgetc2.f cgetri.f
|
||||
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
|
||||
cgghrd.f cgglse.f cggqrf.f cggrqf.f
|
||||
cggsvd.f cggsvp.f
|
||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
||||
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
|
||||
chetf2.f chetrd.f
|
||||
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
|
||||
chetrs.f chetrs2.f
|
||||
chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
|
||||
chgeqz.f chpcon.f chpev.f chpevd.f
|
||||
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
|
||||
chpsvx.f
|
||||
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
|
||||
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
|
||||
claed0.f claed7.f claed8.f
|
||||
claein.f claesy.f claev2.f clags2.f clagtm.f
|
||||
clahef.f clahef_rook.f clahqr.f
|
||||
clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
||||
clanhb.f clanhe.f
|
||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
||||
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
|
||||
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
|
||||
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
|
||||
clarf.f clarfb.f clarfg.f clarft.f clarfgp.f
|
||||
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
||||
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
||||
clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
||||
cposv.f cposvx.f cpstrf.f cpstf2.f
|
||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
||||
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
|
||||
crot.f cspcon.f csprfs.f cspsv.f
|
||||
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
|
||||
cstegr.f cstein.f csteqr.f
|
||||
csycon.f
|
||||
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
|
||||
csyswapr.f csytrs.f csytrs2.f csyconv.f
|
||||
csytf2_rook.f csytrf_rook.f csytrs_rook.f
|
||||
csytri_rook.f csycon_rook.f csysv_rook.f
|
||||
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
|
||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
||||
ctprfs.f ctptri.f
|
||||
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
||||
ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f
|
||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
|
||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
||||
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
|
||||
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
|
||||
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
|
||||
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
|
||||
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
|
||||
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
|
||||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
|
||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
|
||||
)
|
||||
|
||||
set(ZCLASRC cpotrs.f)
|
||||
|
||||
set(SCATGEN slatm1.f slaran.f slarnd.f)
|
||||
set(DLASRC
|
||||
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
||||
dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
||||
dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
|
||||
dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
|
||||
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f
|
||||
dgetc2.f dgetri.f
|
||||
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f
|
||||
dggglm.f dgghrd.f dgglse.f dggqrf.f
|
||||
dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
|
||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
||||
dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
||||
dlapll.f dlapmt.f
|
||||
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
|
||||
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
|
||||
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
|
||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
|
||||
dlargv.f dlarrv.f dlartv.f
|
||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
|
||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f
|
||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f
|
||||
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
|
||||
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
|
||||
dpbstf.f dpbsv.f dpbsvx.f
|
||||
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
|
||||
dposvx.f dpotrs.f dpstrf.f dpstf2.f
|
||||
dppcon.f dppequ.f
|
||||
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
|
||||
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
|
||||
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
|
||||
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
|
||||
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
|
||||
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
|
||||
dstevx.f
|
||||
dsycon.f dsyev.f dsyevd.f dsyevr.f
|
||||
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
|
||||
dsysv.f dsysvx.f
|
||||
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
|
||||
dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
|
||||
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
|
||||
dsytri_rook.f dsycon_rook.f dsysv_rook.f
|
||||
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
|
||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
||||
dtptrs.f
|
||||
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
||||
dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f
|
||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
||||
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
|
||||
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
|
||||
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
|
||||
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
|
||||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
|
||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
|
||||
)
|
||||
|
||||
set(SMATGEN slatms.f slatme.f slatmr.f slatmt.f
|
||||
slagge.f slagsy.f slakf2.f slarge.f slaror.f slarot.f slatm2.f
|
||||
slatm3.f slatm5.f slatm6.f slatm7.f slahilb.f)
|
||||
set(ZLASRC
|
||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
||||
zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f
|
||||
zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
||||
zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
||||
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
|
||||
zgetri.f
|
||||
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f
|
||||
zgghrd.f zgglse.f zggqrf.f zggrqf.f
|
||||
zggsvd.f zggsvp.f
|
||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
||||
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
|
||||
zhetf2.f zhetrd.f
|
||||
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
|
||||
zhetrs.f zhetrs2.f
|
||||
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
|
||||
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
|
||||
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
|
||||
zhpsvx.f
|
||||
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
|
||||
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
|
||||
zlaed0.f zlaed7.f zlaed8.f
|
||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
||||
zlahef.f zlahef_rook.f zlahqr.f
|
||||
zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
||||
zlangt.f zlanhb.f
|
||||
zlanhe.f
|
||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
||||
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
|
||||
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
|
||||
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
|
||||
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
|
||||
zlarcm.f zlarf.f zlarfb.f
|
||||
zlarfg.f zlarft.f zlarfgp.f
|
||||
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
||||
zlassq.f zlasyf.f zlasyf_rook.f
|
||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f
|
||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
||||
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f
|
||||
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
|
||||
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
|
||||
zrot.f zspcon.f zsprfs.f zspsv.f
|
||||
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
|
||||
zstegr.f zstein.f zsteqr.f
|
||||
zsycon.f
|
||||
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
|
||||
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f
|
||||
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
|
||||
zsytri_rook.f zsycon_rook.f zsysv_rook.f
|
||||
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
|
||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
||||
ztprfs.f ztptri.f
|
||||
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
||||
ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f
|
||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
|
||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
||||
zunmtr.f zupgtr.f
|
||||
zupmtr.f izmax1.f dzsum1.f zstemr.f
|
||||
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
|
||||
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
|
||||
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
|
||||
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
|
||||
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
|
||||
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
|
||||
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
|
||||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
|
||||
)
|
||||
|
||||
set(CMATGEN clatms.f clatme.f clatmr.f clatmt.f
|
||||
clagge.f claghe.f clagsy.f clakf2.f clarge.f claror.f clarot.f
|
||||
clatm1.f clarnd.f clatm2.f clatm3.f clatm5.f clatm6.f clahilb.f slatm7.f)
|
||||
set(LA_REL_SRC ${ALLAUX})
|
||||
if (BUILD_SINGLE)
|
||||
list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
|
||||
endif ()
|
||||
|
||||
set(DZATGEN dlatm1.f dlaran.f dlarnd.f)
|
||||
if (BUILD_DOUBLE)
|
||||
list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
|
||||
endif ()
|
||||
|
||||
set(DMATGEN dlatms.f dlatme.f dlatmr.f dlatmt.f
|
||||
dlagge.f dlagsy.f dlakf2.f dlarge.f dlaror.f dlarot.f dlatm2.f
|
||||
dlatm3.f dlatm5.f dlatm6.f dlatm7.f dlahilb.f)
|
||||
if (BUILD_COMPLEX)
|
||||
list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
|
||||
endif ()
|
||||
|
||||
set(ZMATGEN zlatms.f zlatme.f zlatmr.f zlatmt.f
|
||||
zlagge.f zlaghe.f zlagsy.f zlakf2.f zlarge.f zlaror.f zlarot.f
|
||||
zlatm1.f zlarnd.f zlatm2.f zlatm3.f zlatm5.f zlatm6.f zlahilb.f dlatm7.f)
|
||||
|
||||
if(BUILD_SINGLE)
|
||||
set(LA_REL_SRC ${SLASRC} ${DSLASRC} ${ALLAUX} ${SCLAUX})
|
||||
set(LA_GEN_SRC ${SMATGEN} ${SCATGEN})
|
||||
message(STATUS "Building Single Precision")
|
||||
endif()
|
||||
if(BUILD_DOUBLE)
|
||||
set(LA_REL_SRC ${LA_REL_SRC} ${DLASRC} ${DSLASRC} ${ALLAUX} ${DZLAUX})
|
||||
set(LA_GEN_SRC ${LA_GEN_SRC} ${DMATGEN} ${DZATGEN})
|
||||
message(STATUS "Building Double Precision")
|
||||
endif()
|
||||
if(BUILD_COMPLEX)
|
||||
set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX})
|
||||
SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN})
|
||||
message(STATUS "Building Complex Precision")
|
||||
endif()
|
||||
if(BUILD_COMPLEX16)
|
||||
set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX})
|
||||
SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN})
|
||||
message(STATUS "Building Double Complex Precision")
|
||||
endif()
|
||||
if (BUILD_COMPLEX16)
|
||||
list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
|
||||
endif ()
|
||||
|
||||
# add lapack-netlib folder to the sources
|
||||
set(LA_SOURCES "")
|
||||
foreach (LA_FILE ${LA_REL_SRC})
|
||||
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
|
||||
endforeach ()
|
||||
foreach (LA_FILE ${LA_GEN_SRC})
|
||||
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/TESTING/MATGEN/${LA_FILE}")
|
||||
endforeach ()
|
||||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")
|
||||
|
||||
4467
cmake/lapacke.cmake
4467
cmake/lapacke.cmake
File diff suppressed because it is too large
Load Diff
@@ -1,11 +0,0 @@
|
||||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||
libsuffix=@SUFFIX64_UNDERSCORE@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
|
||||
openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@
|
||||
Name: OpenBLAS
|
||||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||
Version: @OPENBLAS_VERSION@
|
||||
URL: https://github.com/xianyi/OpenBLAS
|
||||
Libs: -L${libdir} -lopenblas${libsuffix}
|
||||
Cflags: -I${includedir}
|
||||
@@ -3,12 +3,20 @@
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Detects the OS and sets appropriate variables.
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(EXTRALIB "${EXTRALIB} -lm")
|
||||
set(NO_EXPRECISION 1)
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
|
||||
set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
|
||||
set(MD5SUM "md5 -r")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD|OpenBSD|NetBSD|DragonFly")
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
|
||||
set(MD5SUM "md5 -r")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
|
||||
set(MD5SUM "md5 -n")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(EXTRALIB "${EXTRALIB} -lm")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
@@ -48,7 +56,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
|
||||
# Ensure the correct stack alignment on Win32
|
||||
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
|
||||
endif ()
|
||||
@@ -69,8 +77,8 @@ if (CYGWIN)
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android")
|
||||
if (USE_THREAD)
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
|
||||
if (SMP)
|
||||
set(EXTRALIB "${EXTRALIB} -lpthread")
|
||||
endif ()
|
||||
endif ()
|
||||
@@ -80,7 +88,7 @@ if (QUAD_PRECISION)
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
|
||||
@@ -4,8 +4,7 @@
|
||||
## This is triggered by system.cmake and runs before any of the code is built.
|
||||
## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
|
||||
## Next it runs f_check and appends some fortran information to the files.
|
||||
## Then it runs getarch and getarch_2nd for even more environment information.
|
||||
## Finally it builds gen_config_h for use at build time to generate config.h.
|
||||
## Finally it runs getarch and getarch_2nd for even more environment information.
|
||||
|
||||
# CMake vars set by this file:
|
||||
# CORE
|
||||
@@ -37,339 +36,78 @@
|
||||
|
||||
# CPUIDEMU = ../../cpuid/table.o
|
||||
|
||||
|
||||
if (DEFINED CPUIDEMU)
|
||||
set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
|
||||
endif ()
|
||||
|
||||
if (BUILD_KERNEL)
|
||||
if (DEFINED TARGET_CORE)
|
||||
# set the C flags for just this file
|
||||
set(GETARCH2_FLAGS "-DBUILD_KERNEL")
|
||||
set(TARGET_MAKE "Makefile_kernel.conf")
|
||||
set(TARGET_CONF "config_kernel.h")
|
||||
set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}/kernel_config/${TARGET_CORE})
|
||||
else()
|
||||
set(TARGET_MAKE "Makefile.conf")
|
||||
set(TARGET_CONF "config.h")
|
||||
set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR})
|
||||
endif ()
|
||||
|
||||
set(TARGET_CONF_TEMP "${PROJECT_BINARY_DIR}/${TARGET_CONF}.tmp")
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake")
|
||||
|
||||
# c_check
|
||||
set(FU "")
|
||||
if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang"))
|
||||
set(FU "_")
|
||||
endif()
|
||||
if(MINGW AND NOT MINGW64)
|
||||
set(FU "_")
|
||||
endif()
|
||||
|
||||
set(COMPILER_ID ${CMAKE_C_COMPILER_ID})
|
||||
if (${COMPILER_ID} STREQUAL "GNU")
|
||||
set(COMPILER_ID "GCC")
|
||||
endif ()
|
||||
|
||||
string(TOUPPER ${ARCH} UC_ARCH)
|
||||
|
||||
file(WRITE ${TARGET_CONF_TEMP}
|
||||
"#define OS_${HOST_OS}\t1\n"
|
||||
"#define ARCH_${UC_ARCH}\t1\n"
|
||||
"#define C_${COMPILER_ID}\t1\n"
|
||||
"#define __${BINARY}BIT__\t1\n"
|
||||
"#define FUNDERSCORE\t${FU}\n")
|
||||
|
||||
if (${HOST_OS} STREQUAL "WINDOWSSTORE")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define OS_WINNT\t1\n")
|
||||
endif ()
|
||||
|
||||
# f_check
|
||||
if (NOT NOFORTRAN)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
|
||||
else ()
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n")
|
||||
set(BU "_")
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake")
|
||||
endif ()
|
||||
|
||||
# Cannot run getarch on target if we are cross-compiling
|
||||
if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSSTORE"))
|
||||
# Write to config as getarch would
|
||||
if (DEFINED TARGET_CORE)
|
||||
set(TCORE ${TARGET_CORE})
|
||||
else()
|
||||
set(TCORE ${CORE})
|
||||
endif()
|
||||
# compile getarch
|
||||
set(GETARCH_SRC
|
||||
${CMAKE_SOURCE_DIR}/getarch.c
|
||||
${CPUIDEMO}
|
||||
)
|
||||
|
||||
# TODO: Set up defines that getarch sets up based on every other target
|
||||
# Perhaps this should be inside a different file as it grows larger
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define ${TCORE}\n"
|
||||
"#define CHAR_CORENAME \"${TCORE}\"\n")
|
||||
if ("${TCORE}" STREQUAL "ARMV7")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
"#define L1_DATA_LINESIZE\t32\n"
|
||||
"#define L2_SIZE\t512488\n"
|
||||
"#define L2_LINESIZE\t32\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define L2_ASSOCIATIVE\t4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n")
|
||||
set(SGEMM_UNROLL_M 4)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 4)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
elseif ("${TCORE}" STREQUAL "ARMV8")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L2_SIZE\t262144\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define L2_ASSOCIATIVE\t32\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "CORTEXA57" OR "${TCORE}" STREQUAL "CORTEXA53")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t3\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t2\n"
|
||||
"#define L2_SIZE\t262144\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t16\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "CORTEXA72" OR "${TCORE}" STREQUAL "CORTEXA73")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t49152\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t3\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t2\n"
|
||||
"#define L2_SIZE\t524288\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t16\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "FALKOR")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t65536\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t3\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t128\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t2\n"
|
||||
"#define L2_SIZE\t524288\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t16\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "THUNDERX")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t3\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t128\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t2\n"
|
||||
"#define L2_SIZE\t167772164\n"
|
||||
"#define L2_LINESIZE\t128\n"
|
||||
"#define L2_ASSOCIATIVE\t16\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 4)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 2)
|
||||
set(DGEMM_UNROLL_N 2)
|
||||
set(CGEMM_UNROLL_M 2)
|
||||
set(CGEMM_UNROLL_N 2)
|
||||
set(ZGEMM_UNROLL_M 2)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "THUNDERX2T99")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t8\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t8\n"
|
||||
"#define L2_SIZE\t262144\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t8\n"
|
||||
"#define L3_SIZE\t33554432\n"
|
||||
"#define L3_LINESIZE\t64\n"
|
||||
"#define L3_ASSOCIATIVE\t32\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
endif()
|
||||
|
||||
# Or should this actually be NUM_CORES?
|
||||
if (${NUM_THREADS} GREATER 0)
|
||||
file(APPEND ${TARGET_CONF_TEMP} "#define NUM_CORES\t${NUM_THREADS}\n")
|
||||
endif()
|
||||
|
||||
# GetArch_2nd
|
||||
foreach(float_char S;D;Q;C;Z;X)
|
||||
if (NOT DEFINED ${float_char}GEMM_UNROLL_M)
|
||||
set(${float_char}GEMM_UNROLL_M 2)
|
||||
endif()
|
||||
if (NOT DEFINED ${float_char}GEMM_UNROLL_N)
|
||||
set(${float_char}GEMM_UNROLL_N 2)
|
||||
endif()
|
||||
endforeach()
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define GEMM_MULTITHREAD_THRESHOLD\t${GEMM_MULTITHREAD_THRESHOLD}\n")
|
||||
# Move to where gen_config_h would place it
|
||||
file(MAKE_DIRECTORY ${TARGET_CONF_DIR})
|
||||
file(RENAME ${TARGET_CONF_TEMP} "${TARGET_CONF_DIR}/${TARGET_CONF}")
|
||||
|
||||
else(NOT CMAKE_CROSSCOMPILING)
|
||||
# compile getarch
|
||||
set(GETARCH_SRC
|
||||
${PROJECT_SOURCE_DIR}/getarch.c
|
||||
${CPUIDEMU}
|
||||
)
|
||||
|
||||
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC")
|
||||
#Use generic for MSVC now
|
||||
message("MSVC")
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
|
||||
else()
|
||||
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
|
||||
endif ()
|
||||
|
||||
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
# disable WindowsStore strict CRT checks
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
|
||||
endif ()
|
||||
|
||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
|
||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
||||
configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY)
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||
SOURCES ${GETARCH_SRC}
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE GETARCH_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||
)
|
||||
|
||||
if (NOT ${GETARCH_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
||||
endif ()
|
||||
endif ()
|
||||
message(STATUS "Running getarch")
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
|
||||
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
|
||||
|
||||
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
|
||||
|
||||
# append config data from getarch to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH_MAKE_OUT})
|
||||
|
||||
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
|
||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH2_DIR})
|
||||
configure_file(${TARGET_CONF_TEMP} ${GETARCH2_DIR}/${TARGET_CONF} COPYONLY)
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE GETARCH2_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||
)
|
||||
|
||||
if (NOT ${GETARCH2_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
|
||||
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
|
||||
|
||||
# append config data from getarch_2nd to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT})
|
||||
|
||||
configure_file(${TARGET_CONF_TEMP} ${TARGET_CONF_DIR}/${TARGET_CONF} COPYONLY)
|
||||
|
||||
ParseGetArchVars(${GETARCH2_MAKE_OUT})
|
||||
if (NOT MSVC)
|
||||
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
|
||||
endif ()
|
||||
|
||||
if (MSVC)
|
||||
#Use generic for MSVC now
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
|
||||
endif()
|
||||
|
||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
|
||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||
SOURCES ${GETARCH_SRC}
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GETARCH_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||
)
|
||||
|
||||
message(STATUS "Running getarch")
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
|
||||
|
||||
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
|
||||
|
||||
# append config data from getarch to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH_MAKE_OUT})
|
||||
|
||||
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
|
||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH2_DIR})
|
||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GETARCH2_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||
)
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
|
||||
|
||||
# append config data from getarch_2nd to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH2_MAKE_OUT})
|
||||
|
||||
|
||||
@@ -2,27 +2,13 @@
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from OpenBLAS/Makefile.system
|
||||
##
|
||||
set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib")
|
||||
|
||||
# System detection, via CMake.
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/system_check.cmake")
|
||||
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib")
|
||||
|
||||
if(CMAKE_CROSSCOMPILING AND NOT DEFINED TARGET)
|
||||
# Detect target without running getarch
|
||||
if (ARM64)
|
||||
set(TARGET "ARMV8")
|
||||
elseif(ARM)
|
||||
set(TARGET "ARMV7") # TODO: Ask compiler which arch this is
|
||||
else()
|
||||
message(FATAL_ERROR "When cross compiling, a TARGET is required.")
|
||||
endif()
|
||||
endif()
|
||||
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
|
||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||
|
||||
# Other files expect CORE, which is actually TARGET and will become TARGET_CORE for kernel build. Confused yet?
|
||||
# It seems we are meant to use TARGET as input and CORE internally as kernel.
|
||||
if(NOT DEFINED CORE AND DEFINED TARGET)
|
||||
set(CORE ${TARGET})
|
||||
endif()
|
||||
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
|
||||
|
||||
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
|
||||
if (DEFINED TARGET_CORE)
|
||||
@@ -33,50 +19,19 @@ endif ()
|
||||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
set(NO_AVX 1)
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
|
||||
set(TARGET "NEHALEM")
|
||||
endif ()
|
||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
|
||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
|
||||
set(TARGET "BARCELONA")
|
||||
endif ()
|
||||
if (${TARGET} STREQUAL "ARMV8" OR ${TARGET} STREQUAL "CORTEXA57" OR ${TARGET} STREQUAL "CORTEXA53")
|
||||
set(TARGET "ARMV7")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
endif()
|
||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
message(STATUS "Targeting the ${TARGET} architecture.")
|
||||
message(STATUS "Targetting the ${TARGET} architecture.")
|
||||
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
||||
endif ()
|
||||
|
||||
# On x86_64 build getarch with march=native. This is required to detect AVX512 support in getarch.
|
||||
if (X86_64)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -march=native")
|
||||
endif ()
|
||||
|
||||
# On x86 no AVX support is available
|
||||
if (X86 OR X86_64)
|
||||
if ((DEFINED BINARY AND BINARY EQUAL 32) OR ("$CMAKE_SIZEOF_VOID_P}" EQUAL "4"))
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX -DNO_AVX2 -DNO_AVX512")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (INTERFACE64)
|
||||
message(STATUS "Using 64-bit integers.")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT")
|
||||
@@ -98,16 +53,21 @@ if (NO_AVX2)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} ${CMAKE_C_FLAGS_DEBUG}")
|
||||
if (CMAKE_BUILD_TYPE STREQUAL Debug)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
|
||||
endif ()
|
||||
|
||||
# TODO: let CMake handle this? -hpa
|
||||
#if (${QUIET_MAKE})
|
||||
# set(MAKE "${MAKE} -s")
|
||||
#endif()
|
||||
|
||||
if (NOT DEFINED NO_PARALLEL_MAKE)
|
||||
set(NO_PARALLEL_MAKE 0)
|
||||
endif ()
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
|
||||
|
||||
if (CMAKE_C_COMPILER STREQUAL loongcc)
|
||||
if (CMAKE_CXX_COMPILER STREQUAL loongcc)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -static")
|
||||
endif ()
|
||||
|
||||
@@ -118,65 +78,63 @@ else ()
|
||||
set(ONLY_CBLAS 0)
|
||||
endif ()
|
||||
|
||||
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
|
||||
if (NOT CMAKE_CROSSCOMPILING)
|
||||
if (NOT DEFINED NUM_CORES)
|
||||
include(ProcessorCount)
|
||||
ProcessorCount(NUM_CORES)
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NUM_PARALLEL)
|
||||
set(NUM_PARALLEL 1)
|
||||
endif()
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||
|
||||
if (NOT DEFINED NUM_THREADS)
|
||||
if (DEFINED NUM_CORES AND NOT NUM_CORES EQUAL 0)
|
||||
# HT?
|
||||
set(NUM_THREADS ${NUM_CORES})
|
||||
else ()
|
||||
set(NUM_THREADS 0)
|
||||
endif ()
|
||||
endif()
|
||||
set(NUM_THREADS ${NUM_CORES})
|
||||
endif ()
|
||||
|
||||
if (${NUM_THREADS} LESS 2)
|
||||
if (${NUM_THREADS} EQUAL 1)
|
||||
set(USE_THREAD 0)
|
||||
elseif(NOT DEFINED USE_THREAD)
|
||||
set(USE_THREAD 1)
|
||||
endif ()
|
||||
|
||||
if (USE_THREAD)
|
||||
message(STATUS "Multi-threading enabled with ${NUM_THREADS} threads.")
|
||||
else()
|
||||
if (${USE_LOCKING})
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_LOCKING")
|
||||
if (DEFINED USE_THREAD)
|
||||
if (NOT ${USE_THREAD})
|
||||
unset(SMP)
|
||||
else ()
|
||||
set(SMP 1)
|
||||
endif ()
|
||||
else ()
|
||||
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
|
||||
if (${NUM_THREADS} EQUAL 1)
|
||||
unset(SMP)
|
||||
else ()
|
||||
set(SMP 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||
if (DEFINED BINARY)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
if (${SMP})
|
||||
message(STATUS "SMP enabled.")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED NEED_PIC)
|
||||
set(NEED_PIC 1)
|
||||
endif ()
|
||||
|
||||
# TODO: I think CMake should be handling all this stuff -hpa
|
||||
unset(ARFLAGS)
|
||||
set(CPP "${COMPILER} -E")
|
||||
set(AR "${CROSS_SUFFIX}ar")
|
||||
set(AS "${CROSS_SUFFIX}as")
|
||||
set(LD "${CROSS_SUFFIX}ld")
|
||||
set(RANLIB "${CROSS_SUFFIX}ranlib")
|
||||
set(NM "${CROSS_SUFFIX}nm")
|
||||
set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
|
||||
set(OBJCOPY "${CROSS_SUFFIX}objcopy")
|
||||
set(OBJCONV "${CROSS_SUFFIX}objconv")
|
||||
|
||||
# OS dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/os.cmake")
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake")
|
||||
|
||||
# Architecture dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/arch.cmake")
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake")
|
||||
|
||||
# C Compiler dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/cc.cmake")
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake")
|
||||
|
||||
if (NOT NOFORTRAN)
|
||||
# Fortran Compiler dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/fc.cmake")
|
||||
else ()
|
||||
set(NO_LAPACK 1)
|
||||
set(NO_LAPACKE 1)
|
||||
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake")
|
||||
endif ()
|
||||
|
||||
if (BINARY64)
|
||||
@@ -192,32 +150,15 @@ if (NEED_PIC)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
|
||||
if (NOT NOFORTRAN)
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (X86 OR X86_64 OR ARM64 OR PPC)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
|
||||
if (DYNAMIC_OLDER)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")
|
||||
endif ()
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
|
||||
else ()
|
||||
unset (DYNAMIC_ARCH)
|
||||
message (STATUS "DYNAMIC_ARCH is not supported on the target architecture, removing")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_LIST)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_LIST")
|
||||
foreach(DCORE ${DYNAMIC_LIST})
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYN_${DCORE}")
|
||||
endforeach ()
|
||||
if (DYNAMIC_ARCH)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
|
||||
endif ()
|
||||
|
||||
if (NO_LAPACK)
|
||||
@@ -234,7 +175,7 @@ if (NO_AVX)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
@@ -242,20 +183,25 @@ if (NO_AVX2)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
|
||||
endif ()
|
||||
|
||||
if (USE_THREAD)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
# NO_AFFINITY = 1
|
||||
if (SMP)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
set(USE_SIMPLE_THREADED_LEVEL3 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
# NO_AFFINITY = 1
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
|
||||
endif ()
|
||||
|
||||
if (BIGNUMA)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
if (NO_WARMUP)
|
||||
@@ -266,10 +212,6 @@ if (CONSISTENT_FPCSR)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
|
||||
endif ()
|
||||
|
||||
if (USE_TLS)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_TLS")
|
||||
endif ()
|
||||
|
||||
# Only for development
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
|
||||
@@ -287,8 +229,6 @@ endif ()
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_PARALLEL_NUMBER=${NUM_PARALLEL}")
|
||||
|
||||
if (USE_SIMPLE_THREADED_LEVEL3)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
|
||||
endif ()
|
||||
@@ -307,10 +247,10 @@ if (NOT DEFINED SYMBOLSUFFIX)
|
||||
set(SYMBOLSUFFIX "")
|
||||
endif ()
|
||||
|
||||
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}")
|
||||
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}")
|
||||
|
||||
# TODO: need to convert these Makefiles
|
||||
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake
|
||||
# TODO: nead to convert these Makefiles
|
||||
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake
|
||||
|
||||
if (${CORE} STREQUAL "PPC440")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
|
||||
@@ -324,7 +264,7 @@ if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(NO_AFFINITY 1)
|
||||
endif ()
|
||||
|
||||
if (NOT X86_64 AND NOT X86 AND NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
set(NO_AFFINITY 1)
|
||||
endif ()
|
||||
|
||||
@@ -356,21 +296,50 @@ if (MIXED_MEMORY_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
|
||||
endif ()
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DVERSION=\"\\\"${OpenBLAS_VERSION}\\\"\"")
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
|
||||
set(TAR gtar)
|
||||
set(PATCH gpatch)
|
||||
set(GREP ggrep)
|
||||
else ()
|
||||
set(TAR tar)
|
||||
set(PATCH patch)
|
||||
set(GREP grep)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED MD5SUM)
|
||||
set(MD5SUM md5sum)
|
||||
endif ()
|
||||
|
||||
set(AWK awk)
|
||||
|
||||
set(REVISION "-r${OpenBLAS_VERSION}")
|
||||
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CCOMMON_OPT}")
|
||||
if (DEBUG)
|
||||
set(COMMON_OPT "${COMMON_OPT} -g")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED COMMON_OPT)
|
||||
set(COMMON_OPT "-O2")
|
||||
endif ()
|
||||
|
||||
#For x86 32-bit
|
||||
if (DEFINED BINARY AND BINARY EQUAL 32)
|
||||
if (NOT MSVC)
|
||||
set(COMMON_OPT "${COMMON_OPT} -m32")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CCOMMON_OPT}")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
|
||||
endif()
|
||||
# TODO: not sure what PFLAGS is -hpa
|
||||
set(PFLAGS "${PFLAGS} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
|
||||
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
|
||||
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${FCOMMON_OPT}")
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
|
||||
# TODO: not sure what FPFLAGS is -hpa
|
||||
set(FPFLAGS "${FPFLAGS} ${FCOMMON_OPT} ${COMMON_PROF}")
|
||||
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
|
||||
@@ -378,7 +347,7 @@ set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
|
||||
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parallel")
|
||||
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
|
||||
foreach (FILTER_FLAG ${FILTER_FLAGS})
|
||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
|
||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
|
||||
@@ -416,7 +385,7 @@ if (NOT DEFINED LIBSUFFIX)
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (USE_THREAD)
|
||||
if (DEFINED SMP)
|
||||
set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
|
||||
else ()
|
||||
@@ -424,7 +393,7 @@ if (DYNAMIC_ARCH)
|
||||
set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
|
||||
endif ()
|
||||
else ()
|
||||
if (USE_THREAD)
|
||||
if (DEFINED SMP)
|
||||
set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
|
||||
else ()
|
||||
@@ -441,8 +410,8 @@ set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
|
||||
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
|
||||
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
|
||||
|
||||
set(LIBS "${PROJECT_SOURCE_DIR}/${LIBNAME}")
|
||||
set(LIBS_P "${PROJECT_SOURCE_DIR}/${LIBNAME_P}")
|
||||
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}")
|
||||
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}")
|
||||
|
||||
|
||||
set(LIB_COMPONENTS BLAS)
|
||||
@@ -455,9 +424,6 @@ if (NOT NO_LAPACK)
|
||||
if (NOT NO_LAPACKE)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
|
||||
endif ()
|
||||
if (BUILD_RELAPACK)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} ReLAPACK")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (ONLY_CBLAS)
|
||||
@@ -469,7 +435,7 @@ endif ()
|
||||
set(USE_GEMM3M 0)
|
||||
|
||||
if (DEFINED ARCH)
|
||||
if (X86 OR X86_64 OR ${ARCH} STREQUAL "ia64" OR MIPS64)
|
||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
|
||||
set(USE_GEMM3M 1)
|
||||
endif ()
|
||||
|
||||
@@ -552,3 +518,35 @@ endif ()
|
||||
# export CUFLAGS
|
||||
# export CULIB
|
||||
#endif
|
||||
|
||||
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
|
||||
#
|
||||
#.f.$(SUFFIX):
|
||||
# $(FC) $(FFLAGS) -c $< -o $(@F)
|
||||
#
|
||||
#.f.$(PSUFFIX):
|
||||
# $(FC) $(FPFLAGS) -pg -c $< -o $(@F)
|
||||
|
||||
# these are not cross-platform
|
||||
#ifdef BINARY64
|
||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1
|
||||
#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib
|
||||
#else
|
||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1/32
|
||||
#PGIPATH = /opt/pgi/linux86/7.1-5/lib
|
||||
#endif
|
||||
|
||||
#ACMLPATH = /opt/acml/4.3.0
|
||||
#ifneq ($(OSNAME), Darwin)
|
||||
#MKLPATH = /opt/intel/mkl/10.2.2.025/lib
|
||||
#else
|
||||
#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
|
||||
#endif
|
||||
#ATLASPATH = /opt/atlas/3.9.17/opteron
|
||||
#FLAMEPATH = $(HOME)/flame/lib
|
||||
#ifneq ($(OSNAME), SunOS)
|
||||
#SUNPATH = /opt/sunstudio12.1
|
||||
#else
|
||||
#SUNPATH = /opt/SUNWspro
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,95 +0,0 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from the OpenBLAS/c_check perl script.
|
||||
## This is triggered by prebuild.cmake and runs before any of the code is built.
|
||||
## Creates config.h and Makefile.conf.
|
||||
|
||||
# Convert CMake vars into the format that OpenBLAS expects
|
||||
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
|
||||
if (${HOST_OS} STREQUAL "WINDOWS")
|
||||
set(HOST_OS WINNT)
|
||||
endif ()
|
||||
|
||||
if (${HOST_OS} STREQUAL "LINUX")
|
||||
# check if we're building natively on Android (TERMUX)
|
||||
EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM)
|
||||
if(${OPERATING_SYSTEM} MATCHES "Android")
|
||||
set(HOST_OS ANDROID)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC AND WIN32)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine
|
||||
OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(OPENBLAS_GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64")
|
||||
set(MINGW64 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Pretty thorough determination of arch. Add more if needed
|
||||
if(CMAKE_CL_64 OR MINGW64)
|
||||
set(X86_64 1)
|
||||
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
|
||||
set(X86 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*")
|
||||
set(PPC 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips64.*")
|
||||
set(MIPS64 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
set(X86_64 1)
|
||||
else()
|
||||
set(X86 1)
|
||||
endif()
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
|
||||
set(X86 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
|
||||
set(ARM 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
|
||||
set(ARM64 1)
|
||||
else()
|
||||
set(ARM 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (X86_64)
|
||||
set(ARCH "x86_64")
|
||||
elseif(X86)
|
||||
set(ARCH "x86")
|
||||
elseif(PPC)
|
||||
set(ARCH "power")
|
||||
elseif(ARM)
|
||||
set(ARCH "arm")
|
||||
elseif(ARM64)
|
||||
set(ARCH "arm64")
|
||||
else()
|
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture")
|
||||
endif ()
|
||||
|
||||
if (NOT BINARY)
|
||||
if (X86_64 OR ARM64 OR PPC OR MIPS64)
|
||||
set(BINARY 64)
|
||||
else ()
|
||||
set(BINARY 32)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if(BINARY EQUAL 64)
|
||||
set(BINARY64 1)
|
||||
else()
|
||||
set(BINARY32 1)
|
||||
endif()
|
||||
|
||||
if (X86_64 OR X86)
|
||||
file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -c -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512)
|
||||
if (NO_AVX512 EQUAL 1)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
|
||||
endif()
|
||||
file(REMOVE "avx512.tmp" "avx512.o")
|
||||
endif()
|
||||
|
||||
@@ -89,7 +89,7 @@ function(AllCombinations list_in absent_codes_in)
|
||||
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
|
||||
endfunction ()
|
||||
|
||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the function name as a preprocessor definition
|
||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
|
||||
# @param sources_in the source files to build from
|
||||
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
|
||||
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
|
||||
@@ -202,8 +202,6 @@ function(GenerateNamedObjects sources_in)
|
||||
if (use_cblas)
|
||||
set(obj_name "cblas_${obj_name}")
|
||||
list(APPEND obj_defines "CBLAS")
|
||||
elseif (NOT "${obj_name}" MATCHES "${ARCH_SUFFIX}")
|
||||
set(obj_name "${obj_name}${ARCH_SUFFIX}")
|
||||
endif ()
|
||||
|
||||
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
|
||||
@@ -236,9 +234,7 @@ function(GenerateNamedObjects sources_in)
|
||||
|
||||
string(REPLACE ";" "\n#define " define_source "${obj_defines}")
|
||||
string(REPLACE "=" " " define_source "${define_source}")
|
||||
file(WRITE ${new_source_file}.tmp "#define ${define_source}\n#include \"${old_source_file}\"")
|
||||
configure_file(${new_source_file}.tmp ${new_source_file} COPYONLY)
|
||||
file(REMOVE ${new_source_file}.tmp)
|
||||
file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
|
||||
list(APPEND SRC_LIST_OUT ${new_source_file})
|
||||
|
||||
endforeach ()
|
||||
|
||||
141
common.h
141
common.h
@@ -85,8 +85,6 @@ extern "C" {
|
||||
|
||||
#if !defined(_MSC_VER)
|
||||
#include <unistd.h>
|
||||
#elif _MSC_VER < 1900
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
#include <time.h>
|
||||
|
||||
@@ -95,7 +93,7 @@ extern "C" {
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_ANDROID)
|
||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
@@ -107,10 +105,6 @@ extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OS_HAIKU
|
||||
#define NO_SYSV_IPC
|
||||
#endif
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
#ifdef ATOM
|
||||
#define GOTO_ATOM ATOM
|
||||
@@ -131,7 +125,7 @@ extern "C" {
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#if defined(SMP) || defined(USE_LOCKING)
|
||||
#ifdef SMP
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
#endif
|
||||
@@ -185,7 +179,7 @@ extern "C" {
|
||||
|
||||
#define ALLOCA_ALIGN 63UL
|
||||
|
||||
#define NUM_BUFFERS MAX(50,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER))
|
||||
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
|
||||
|
||||
#ifdef NEEDBUNDERSCORE
|
||||
#define BLASFUNC(FUNC) FUNC##_
|
||||
@@ -200,7 +194,7 @@ extern "C" {
|
||||
#error "You can't specify both LOCK operation!"
|
||||
#endif
|
||||
|
||||
#if defined(SMP) || defined(USE_LOCKING)
|
||||
#ifdef SMP
|
||||
#define USE_PTHREAD_LOCK
|
||||
#undef USE_PTHREAD_SPINLOCK
|
||||
#endif
|
||||
@@ -259,14 +253,8 @@ typedef unsigned long BLASULONG;
|
||||
|
||||
#ifdef USE64BITINT
|
||||
typedef BLASLONG blasint;
|
||||
#if defined(OS_WINDOWS) && defined(__64BIT__)
|
||||
#define blasabs(x) llabs(x)
|
||||
#else
|
||||
#define blasabs(x) labs(x)
|
||||
#endif
|
||||
#else
|
||||
typedef int blasint;
|
||||
#define blasabs(x) abs(x)
|
||||
#endif
|
||||
#else
|
||||
#ifdef USE64BITINT
|
||||
@@ -344,25 +332,12 @@ typedef int blasint;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef POWER9
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef PILEDRIVER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
/*
|
||||
#ifdef STEAMROLLER
|
||||
@@ -421,10 +396,6 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||
#include "common_sparc.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_MIPS
|
||||
#include "common_mips.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_MIPS64
|
||||
#include "common_mips64.h"
|
||||
#endif
|
||||
@@ -437,23 +408,14 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||
#include "common_arm64.h"
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_ZARCH
|
||||
#include "common_zarch.h"
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#ifdef OS_WINDOWSSTORE
|
||||
#ifdef OS_WINDOWS
|
||||
typedef char env_var_t[MAX_PATH];
|
||||
#define readenv(p, n) 0
|
||||
#else
|
||||
#if defined(OS_WINDOWS) && !defined(OS_CYGWIN_NT)
|
||||
typedef char env_var_t[MAX_PATH];
|
||||
#define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
|
||||
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
|
||||
#else
|
||||
typedef char* env_var_t;
|
||||
#define readenv(p, n) ((p)=getenv(n))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS)
|
||||
#ifdef _POSIX_MONOTONIC_CLOCK
|
||||
@@ -512,33 +474,6 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
#ifndef __cplusplus
|
||||
#include <complex.h>
|
||||
#endif
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_double(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "param.h"
|
||||
#include "common_param.h"
|
||||
|
||||
@@ -568,6 +503,31 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
#include <stdio.h>
|
||||
#endif // NOINCLUDE
|
||||
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
#ifndef __cplusplus
|
||||
#include <complex.h>
|
||||
#endif
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_double(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
|
||||
#endif
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
|
||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
|
||||
@@ -580,13 +540,8 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
#endif
|
||||
|
||||
#if defined(C_PGI) || defined(C_SUN)
|
||||
#if defined(__STDC_IEC_559_COMPLEX__)
|
||||
#define CREAL(X) creal(X)
|
||||
#define CIMAG(X) cimag(X)
|
||||
#else
|
||||
#define CREAL(X) (*((FLOAT *)&X + 0))
|
||||
#define CIMAG(X) (*((FLOAT *)&X + 1))
|
||||
#endif
|
||||
#define CREAL(X) (*((FLOAT *)&X + 0))
|
||||
#define CIMAG(X) (*((FLOAT *)&X + 1))
|
||||
#else
|
||||
#ifdef OPENBLAS_COMPLEX_STRUCT
|
||||
#define CREAL(Z) ((Z).real)
|
||||
@@ -659,29 +614,8 @@ void gotoblas_profile_init(void);
|
||||
void gotoblas_profile_quit(void);
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
|
||||
#ifndef C_MSVC
|
||||
int omp_in_parallel(void);
|
||||
int omp_get_num_procs(void);
|
||||
#else
|
||||
__declspec(dllimport) int __cdecl omp_in_parallel(void);
|
||||
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
|
||||
#endif
|
||||
|
||||
#if (__STDC_VERSION__ >= 201112L)
|
||||
#if defined(C_GCC) && ( __GNUC__ < 7)
|
||||
// workaround for GCC bug 65467
|
||||
#ifndef _Atomic
|
||||
#define _Atomic volatile
|
||||
#endif
|
||||
#endif
|
||||
#include <stdatomic.h>
|
||||
#else
|
||||
#ifndef _Atomic
|
||||
#define _Atomic volatile
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
#ifdef __ELF__
|
||||
int omp_in_parallel (void) __attribute__ ((weak));
|
||||
@@ -694,11 +628,7 @@ static __inline void blas_unlock(volatile BLASULONG *address){
|
||||
*address = 0;
|
||||
}
|
||||
|
||||
#ifdef OS_WINDOWSSTORE
|
||||
static __inline int readenv_atoi(char *env) {
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
static __inline int readenv_atoi(char *env) {
|
||||
env_var_t p;
|
||||
@@ -713,7 +643,7 @@ static __inline int readenv_atoi(char *env) {
|
||||
return(0);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
|
||||
|
||||
@@ -797,7 +727,6 @@ typedef struct {
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#include "common_stackalloc.h"
|
||||
#if 0
|
||||
#include "symcopy.h"
|
||||
#endif
|
||||
|
||||
@@ -105,6 +105,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
@@ -39,11 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#ifdef F_INTERFACE_FLANG
|
||||
#define RETURN_BY_STACK
|
||||
#else
|
||||
#define RETURN_BY_COMPLEX
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#define CDOTC_K cdotc_k
|
||||
#define CNRM2_K cnrm2_k
|
||||
#define CSCAL_K cscal_k
|
||||
#define CSUM_K csum_k
|
||||
#define CSWAP_K cswap_k
|
||||
#define CROT_K csrot_k
|
||||
|
||||
@@ -250,7 +249,6 @@
|
||||
#define CDOTC_K gotoblas -> cdotc_k
|
||||
#define CNRM2_K gotoblas -> cnrm2_k
|
||||
#define CSCAL_K gotoblas -> cscal_k
|
||||
#define CSUM_K gotoblas -> csum_k
|
||||
#define CSWAP_K gotoblas -> cswap_k
|
||||
#define CROT_K gotoblas -> csrot_k
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#define DDOTC_K ddot_k
|
||||
#define DNRM2_K dnrm2_k
|
||||
#define DSCAL_K dscal_k
|
||||
#define DSUM_K dsum_k
|
||||
#define DSWAP_K dswap_k
|
||||
#define DROT_K drot_k
|
||||
|
||||
@@ -175,7 +174,6 @@
|
||||
#define DDOTC_K gotoblas -> ddot_k
|
||||
#define DNRM2_K gotoblas -> dnrm2_k
|
||||
#define DSCAL_K gotoblas -> dscal_k
|
||||
#define DSUM_K gotoblas -> dsum_k
|
||||
#define DSWAP_K gotoblas -> dswap_k
|
||||
#define DROT_K gotoblas -> drot_k
|
||||
|
||||
|
||||
@@ -122,13 +122,6 @@ xdouble BLASFUNC(qasum) (blasint *, xdouble *, blasint *);
|
||||
double BLASFUNC(dzasum)(blasint *, double *, blasint *);
|
||||
xdouble BLASFUNC(qxasum)(blasint *, xdouble *, blasint *);
|
||||
|
||||
FLOATRET BLASFUNC(ssum) (blasint *, float *, blasint *);
|
||||
FLOATRET BLASFUNC(scsum)(blasint *, float *, blasint *);
|
||||
double BLASFUNC(dsum) (blasint *, double *, blasint *);
|
||||
xdouble BLASFUNC(qsum) (blasint *, xdouble *, blasint *);
|
||||
double BLASFUNC(dzsum)(blasint *, double *, blasint *);
|
||||
xdouble BLASFUNC(qxsum)(blasint *, xdouble *, blasint *);
|
||||
|
||||
blasint BLASFUNC(isamax)(blasint *, float *, blasint *);
|
||||
blasint BLASFUNC(idamax)(blasint *, double *, blasint *);
|
||||
blasint BLASFUNC(iqamax)(blasint *, xdouble *, blasint *);
|
||||
|
||||
@@ -100,13 +100,6 @@ float casum_k (BLASLONG, float *, BLASLONG);
|
||||
double zasum_k (BLASLONG, double *, BLASLONG);
|
||||
xdouble xasum_k (BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
float ssum_k (BLASLONG, float *, BLASLONG);
|
||||
double dsum_k (BLASLONG, double *, BLASLONG);
|
||||
xdouble qsum_k (BLASLONG, xdouble *, BLASLONG);
|
||||
float csum_k (BLASLONG, float *, BLASLONG);
|
||||
double zsum_k (BLASLONG, double *, BLASLONG);
|
||||
xdouble xsum_k (BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
float samax_k (BLASLONG, float *, BLASLONG);
|
||||
double damax_k (BLASLONG, double *, BLASLONG);
|
||||
xdouble qamax_k (BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
@@ -47,14 +47,6 @@ __global__ void cuda_dgemm_kernel(int, int, int, double *, double *, double *);
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern void sgemm_kernel_direct(BLASLONG M, BLASLONG N, BLASLONG K,
|
||||
float * A, BLASLONG strideA,
|
||||
float * B, BLASLONG strideB,
|
||||
float * R, BLASLONG strideR);
|
||||
|
||||
extern int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
|
||||
|
||||
|
||||
int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,
|
||||
|
||||
@@ -70,7 +70,7 @@ extern long int syscall (long int __sysno, ...);
|
||||
static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||
unsigned long *nodemask, unsigned long maxnode,
|
||||
unsigned flags) {
|
||||
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
|
||||
#if defined (__LSB_VERSION__)
|
||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||
return 0;
|
||||
@@ -90,7 +90,7 @@ static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||
}
|
||||
|
||||
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
||||
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH)
|
||||
#if defined (__LSB_VERSION__)
|
||||
// So far, LSB (Linux Standard Base) don't support syscall().
|
||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||
return 0;
|
||||
|
||||
@@ -66,7 +66,6 @@
|
||||
#define DOTC_K QDOTC_K
|
||||
#define NRM2_K QNRM2_K
|
||||
#define SCAL_K QSCAL_K
|
||||
#define SUM_K QSUM_K
|
||||
#define SWAP_K QSWAP_K
|
||||
#define ROT_K QROT_K
|
||||
|
||||
@@ -357,7 +356,6 @@
|
||||
#define DOTC_K DDOTC_K
|
||||
#define NRM2_K DNRM2_K
|
||||
#define SCAL_K DSCAL_K
|
||||
#define SUM_K DSUM_K
|
||||
#define SWAP_K DSWAP_K
|
||||
#define ROT_K DROT_K
|
||||
|
||||
@@ -660,7 +658,6 @@
|
||||
#define DOTC_K SDOTC_K
|
||||
#define NRM2_K SNRM2_K
|
||||
#define SCAL_K SSCAL_K
|
||||
#define SUM_K SSUM_K
|
||||
#define SWAP_K SSWAP_K
|
||||
#define ROT_K SROT_K
|
||||
|
||||
@@ -965,7 +962,6 @@
|
||||
#define DOTC_K XDOTC_K
|
||||
#define NRM2_K XNRM2_K
|
||||
#define SCAL_K XSCAL_K
|
||||
#define SUM_K XSUM_K
|
||||
#define SWAP_K XSWAP_K
|
||||
#define ROT_K XROT_K
|
||||
|
||||
@@ -1367,7 +1363,6 @@
|
||||
#define DOTC_K ZDOTC_K
|
||||
#define NRM2_K ZNRM2_K
|
||||
#define SCAL_K ZSCAL_K
|
||||
#define SUM_K ZSUM_K
|
||||
#define SWAP_K ZSWAP_K
|
||||
#define ROT_K ZROT_K
|
||||
|
||||
@@ -1790,7 +1785,6 @@
|
||||
#define DOTC_K CDOTC_K
|
||||
#define NRM2_K CNRM2_K
|
||||
#define SCAL_K CSCAL_K
|
||||
#define SUM_K CSUM_K
|
||||
#define SWAP_K CSWAP_K
|
||||
#define ROT_K CROT_K
|
||||
|
||||
@@ -2199,7 +2193,7 @@
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
|
||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64)
|
||||
extern BLASLONG gemm_offset_a;
|
||||
extern BLASLONG gemm_offset_b;
|
||||
extern BLASLONG sgemm_p;
|
||||
|
||||
103
common_mips.h
103
common_mips.h
@@ -1,103 +0,0 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef COMMON_MIPS
|
||||
#define COMMON_MIPS
|
||||
|
||||
#define MB __sync_synchronize()
|
||||
#define WMB __sync_synchronize()
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static inline unsigned int rpcc(void){
|
||||
unsigned long ret;
|
||||
|
||||
__asm__ __volatile__(".set push \n"
|
||||
"rdhwr %0, $30 \n"
|
||||
".set pop" : "=r"(ret) : : "memory");
|
||||
|
||||
return ret;
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#define GET_IMAGE(res)
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
#define REALNAME ASMNAME
|
||||
#else
|
||||
#define REALNAME ASMFNAME
|
||||
#endif
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
REALNAME:
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE ( 4 << 10)
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -71,16 +71,38 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef COMMON_MIPS64
|
||||
#define COMMON_MIPS64
|
||||
|
||||
#define MB __sync_synchronize()
|
||||
#define WMB __sync_synchronize()
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static void INLINE blas_lock(volatile unsigned long *address){
|
||||
|
||||
long int ret, val = 1;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: ll %0, %3\n"
|
||||
" ori %2, %0, 1\n"
|
||||
" sc %2, %1\n"
|
||||
" beqz %2, 1b\n"
|
||||
" andi %2, %0, 1\n"
|
||||
" sync\n"
|
||||
: "=&r" (val), "=m" (address), "=&r" (ret)
|
||||
: "m" (address)
|
||||
: "memory");
|
||||
|
||||
} while (ret);
|
||||
}
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
static inline unsigned int rpcc(void){
|
||||
unsigned long ret;
|
||||
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
// unsigned long long tmp;
|
||||
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory");
|
||||
//ret=tmp;
|
||||
@@ -89,12 +111,19 @@ static inline unsigned int rpcc(void){
|
||||
"rdhwr %0, $2\n"
|
||||
".set pop": "=r"(ret):: "memory");
|
||||
|
||||
#else
|
||||
__asm__ __volatile__(".set push \n"
|
||||
".set mips32r2\n"
|
||||
"rdhwr %0, $30 \n"
|
||||
".set pop" : "=r"(ret) : : "memory");
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
#define RPCC_DEFINED
|
||||
|
||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
||||
#ifndef NO_AFFINITY
|
||||
//#define WHEREAMI
|
||||
#define WHEREAMI
|
||||
static inline int WhereAmI(void){
|
||||
int ret=0;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
@@ -105,6 +134,7 @@ static inline int WhereAmI(void){
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
|
||||
@@ -63,7 +63,6 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
||||
|
||||
float (*snrm2_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*sasum_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*ssum_k) (BLASLONG, float *, BLASLONG);
|
||||
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
@@ -155,7 +154,6 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
||||
|
||||
double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*dasum_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*dsum_k) (BLASLONG, double *, BLASLONG);
|
||||
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
|
||||
@@ -247,7 +245,6 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
xdouble (*qnrm2_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble (*qasum_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble (*qsum_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
int (*qcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble (*qdot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int (*qrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
|
||||
@@ -335,10 +332,9 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
|
||||
float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*casum_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*csum_k) (BLASLONG, float *, BLASLONG);
|
||||
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float _Complex (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float _Complex (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
|
||||
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
@@ -499,10 +495,9 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
||||
|
||||
double (*znrm2_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*zasum_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*zsum_k) (BLASLONG, double *, BLASLONG);
|
||||
int (*zcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
openblas_complex_double (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
openblas_complex_double (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double _Complex (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double _Complex (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int (*zdrot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
|
||||
|
||||
int (*zaxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
@@ -665,10 +660,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
xdouble (*xnrm2_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble (*xasum_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble (*xsum_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
openblas_complex_xdouble (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
openblas_complex_xdouble (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble _Complex (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble _Complex (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xqrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
|
||||
|
||||
int (*xaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
@@ -894,7 +888,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
} gotoblas_t;
|
||||
|
||||
@@ -1200,6 +1194,8 @@ extern gotoblas_t *gotoblas;
|
||||
#define XGEMM_DEFAULT_UNROLL_N 2
|
||||
#endif
|
||||
|
||||
#define GEMM_THREAD gemm_thread_m
|
||||
|
||||
#ifndef GEMM_THREAD
|
||||
#define GEMM_THREAD gemm_thread_n
|
||||
#endif
|
||||
|
||||
@@ -39,13 +39,8 @@
|
||||
#ifndef COMMON_POWER
|
||||
#define COMMON_POWER
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#define MB __asm__ __volatile__ ("eieio":::"memory")
|
||||
#define WMB __asm__ __volatile__ ("eieio":::"memory")
|
||||
#else
|
||||
#define MB __asm__ __volatile__ ("sync")
|
||||
#define WMB __asm__ __volatile__ ("sync")
|
||||
#endif
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
@@ -241,7 +236,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define HAVE_PREFETCH
|
||||
#endif
|
||||
|
||||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || ( defined(PPC970) && ( defined(OS_DARWIN) || defined(OS_FREEBSD) ) )
|
||||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL)
|
||||
#define DCBT_ARG 0
|
||||
#else
|
||||
#define DCBT_ARG 8
|
||||
@@ -263,13 +258,6 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#define L1_PREFETCH dcbtst
|
||||
#endif
|
||||
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#define L1_DUALFETCH
|
||||
#define L1_PREFETCHSIZE (16 + 128 * 100)
|
||||
#define L1_PREFETCH dcbtst
|
||||
#endif
|
||||
|
||||
#
|
||||
#ifndef L1_PREFETCH
|
||||
#define L1_PREFETCH dcbt
|
||||
#endif
|
||||
@@ -499,7 +487,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD)
|
||||
#ifdef OS_LINUX
|
||||
#ifndef __64BIT__
|
||||
#define PROLOGUE \
|
||||
.section .text;\
|
||||
@@ -598,14 +586,9 @@ REALNAME:;\
|
||||
#ifndef __64BIT__
|
||||
#define PROLOGUE \
|
||||
.machine "any";\
|
||||
.toc;\
|
||||
.globl .REALNAME;\
|
||||
.globl REALNAME;\
|
||||
.csect REALNAME[DS],3;\
|
||||
REALNAME:;\
|
||||
.long .REALNAME, TOC[tc0], 0;\
|
||||
.csect .text[PR],5;\
|
||||
.REALNAME:
|
||||
.REALNAME:;
|
||||
|
||||
#define EPILOGUE \
|
||||
_section_.text:;\
|
||||
@@ -616,14 +599,9 @@ _section_.text:;\
|
||||
|
||||
#define PROLOGUE \
|
||||
.machine "any";\
|
||||
.toc;\
|
||||
.globl .REALNAME;\
|
||||
.globl REALNAME;\
|
||||
.csect REALNAME[DS],3;\
|
||||
REALNAME:;\
|
||||
.llong .REALNAME, TOC[tc0], 0;\
|
||||
.csect .text[PR], 5;\
|
||||
.REALNAME:
|
||||
.REALNAME:;
|
||||
|
||||
#define EPILOGUE \
|
||||
_section_.text:;\
|
||||
@@ -784,7 +762,7 @@ Lmcount$lazy_ptr:
|
||||
|
||||
#define HALT mfspr r0, 1023
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD)
|
||||
#ifdef OS_LINUX
|
||||
#if defined(PPC440) || defined(PPC440FP2)
|
||||
#undef MAX_CPU_NUMBER
|
||||
#define MAX_CPU_NUMBER 1
|
||||
@@ -812,8 +790,6 @@ Lmcount$lazy_ptr:
|
||||
#define BUFFER_SIZE ( 2 << 20)
|
||||
#elif defined(PPC440FP2)
|
||||
#define BUFFER_SIZE ( 16 << 20)
|
||||
#elif defined(POWER8) || defined(POWER9)
|
||||
#define BUFFER_SIZE ( 64 << 20)
|
||||
#else
|
||||
#define BUFFER_SIZE ( 16 << 20)
|
||||
#endif
|
||||
@@ -829,7 +805,7 @@ Lmcount$lazy_ptr:
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD)
|
||||
#ifdef OS_LINUX
|
||||
#ifndef __64BIT__
|
||||
#define FRAMESLOT(X) (((X) * 4) + 8)
|
||||
#else
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#define QDOTC_K qdot_k
|
||||
#define QNRM2_K qnrm2_k
|
||||
#define QSCAL_K qscal_k
|
||||
#define QSUM_K qsum_k
|
||||
#define QSWAP_K qswap_k
|
||||
#define QROT_K qrot_k
|
||||
|
||||
@@ -162,7 +161,6 @@
|
||||
#define QDOTC_K gotoblas -> qdot_k
|
||||
#define QNRM2_K gotoblas -> qnrm2_k
|
||||
#define QSCAL_K gotoblas -> qscal_k
|
||||
#define QSUM_K gotoblas -> qsum_k
|
||||
#define QSWAP_K gotoblas -> qswap_k
|
||||
#define QROT_K gotoblas -> qrot_k
|
||||
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
#define ISMAX_K ismax_k
|
||||
#define ISMIN_K ismin_k
|
||||
#define SASUM_K sasum_k
|
||||
#define SSUM_K ssum_k
|
||||
#define SAXPYU_K saxpy_k
|
||||
#define SAXPYC_K saxpy_k
|
||||
#define SCOPY_K scopy_k
|
||||
@@ -171,7 +170,6 @@
|
||||
#define ISMAX_K gotoblas -> ismax_k
|
||||
#define ISMIN_K gotoblas -> ismin_k
|
||||
#define SASUM_K gotoblas -> sasum_k
|
||||
#define SSUM_K gotoblas -> ssum_k
|
||||
#define SAXPYU_K gotoblas -> saxpy_k
|
||||
#define SAXPYC_K gotoblas -> saxpy_k
|
||||
#define SCOPY_K gotoblas -> scopy_k
|
||||
|
||||
@@ -1,74 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
|
||||
#define STACK_ALLOC_PROTECT
|
||||
#ifdef STACK_ALLOC_PROTECT
|
||||
// Try to detect stack smashing
|
||||
#include <assert.h>
|
||||
#define STACK_ALLOC_PROTECT_SET volatile int stack_check = 0x7fc01234;
|
||||
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7fc01234);
|
||||
#else
|
||||
#define STACK_ALLOC_PROTECT_SET
|
||||
#define STACK_ALLOC_PROTECT_CHECK
|
||||
#endif
|
||||
|
||||
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||
|
||||
/*
|
||||
* Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC.
|
||||
* Stack allocation is much faster than blas_memory_alloc or malloc, particularly
|
||||
* when OpenBLAS is used from a multi-threaded application.
|
||||
* SIZE must be carefully chosen to be:
|
||||
* - as small as possible to maximize the number of stack allocation
|
||||
* - large enough to support all architectures and kernel
|
||||
* Choosing a SIZE too small will lead to a stack smashing.
|
||||
*/
|
||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
||||
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
||||
/* do not restore all register */ \
|
||||
volatile int stack_alloc_size = SIZE; \
|
||||
if (stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) stack_alloc_size = 0; \
|
||||
STACK_ALLOC_PROTECT_SET \
|
||||
/* Avoid declaring an array of length 0 */ \
|
||||
TYPE stack_buffer[stack_alloc_size ? stack_alloc_size : 1] \
|
||||
__attribute__((aligned(0x20))); \
|
||||
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
|
||||
#else
|
||||
//Original OpenBLAS/GotoBLAS codes.
|
||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||
#define STACK_FREE(BUFFER) \
|
||||
STACK_ALLOC_PROTECT_CHECK \
|
||||
if(!stack_alloc_size) \
|
||||
blas_memory_free(BUFFER);
|
||||
#else
|
||||
#define STACK_FREE(BUFFER) blas_memory_free(BUFFER)
|
||||
#endif
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#define XDOTC_K xdotc_k
|
||||
#define XNRM2_K xnrm2_k
|
||||
#define XSCAL_K xscal_k
|
||||
#define XSUM_K xsum_k
|
||||
#define XSWAP_K xswap_k
|
||||
#define XROT_K xqrot_k
|
||||
|
||||
@@ -228,7 +227,6 @@
|
||||
#define XDOTC_K gotoblas -> xdotc_k
|
||||
#define XNRM2_K gotoblas -> xnrm2_k
|
||||
#define XSCAL_K gotoblas -> xscal_k
|
||||
#define XSUM_K gotoblas -> xsum_k
|
||||
#define XSWAP_K gotoblas -> xswap_k
|
||||
#define XROT_K gotoblas -> xqrot_k
|
||||
|
||||
|
||||
27
common_x86.h
27
common_x86.h
@@ -41,10 +41,6 @@
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
#ifdef C_MSVC
|
||||
#include <intrin.h>
|
||||
#endif
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
@@ -62,7 +58,7 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
// use intrinsic instead of inline assembly
|
||||
ret = _InterlockedExchange((volatile LONG *)address, 1);
|
||||
ret = _InterlockedExchange(address, 1);
|
||||
// inline assembly
|
||||
/*__asm {
|
||||
mov eax, address
|
||||
@@ -174,20 +170,13 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
|
||||
if (y <= 1) return x;
|
||||
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
result = x/y;
|
||||
return result;
|
||||
#else
|
||||
#if (MAX_CPU_NUMBER > 64)
|
||||
if ( y > 64) {
|
||||
result = x/y;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
y = blas_quick_divide_table[y];
|
||||
|
||||
__asm__ __volatile__ ("mull %0" :"=d" (result), "+a"(x): "0" (y));
|
||||
#if defined(_MSC_VER) && !defined(__clang__)
|
||||
(void*)result;
|
||||
return x*y;
|
||||
#else
|
||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
||||
|
||||
return result;
|
||||
#endif
|
||||
@@ -214,7 +203,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#endif
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
//Enable some optimization for barcelona.
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
@@ -333,7 +322,7 @@ REALNAME:
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(__ELF__)
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 16; \
|
||||
|
||||
@@ -60,13 +60,8 @@
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define MB do { __asm__ __volatile__("": : :"memory"); } while (0)
|
||||
#define WMB do { __asm__ __volatile__("": : :"memory"); } while (0)
|
||||
#else
|
||||
#define MB do {} while (0)
|
||||
#define WMB do {} while (0)
|
||||
#endif
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
@@ -129,8 +124,7 @@ static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
*ecx=cpuinfo[2];
|
||||
*edx=cpuinfo[3];
|
||||
#else
|
||||
__asm__ __volatile__("mov $0, %%ecx;"
|
||||
"cpuid"
|
||||
__asm__ __volatile__("cpuid"
|
||||
: "=a" (*eax),
|
||||
"=b" (*ebx),
|
||||
"=c" (*ecx),
|
||||
@@ -202,16 +196,9 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
|
||||
if (y <= 1) return x;
|
||||
|
||||
#if (MAX_CPU_NUMBER > 64)
|
||||
if (y > 64) {
|
||||
result = x / y;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
y = blas_quick_divide_table[y];
|
||||
|
||||
__asm__ __volatile__ ("mull %0" :"=d" (result), "+a"(x) : "0" (y));
|
||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -258,10 +245,6 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define RETURN_BY_STACK
|
||||
#endif
|
||||
|
||||
#ifdef F_INTERFACE_FLANG
|
||||
#define RETURN_BY_STACK
|
||||
#endif
|
||||
|
||||
#ifdef F_INTERFACE_PGI
|
||||
#define RETURN_BY_STACK
|
||||
#endif
|
||||
@@ -277,7 +260,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#ifdef ASSEMBLER
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
//Enable some optimization for barcelona.
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
@@ -413,10 +396,10 @@ REALNAME:
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#define EPILOGUE .end
|
||||
#define EPILOGUE .end REALNAME
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(__ELF__) || defined(C_PGI)
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 512; \
|
||||
|
||||
@@ -19,7 +19,6 @@
|
||||
#define ZDOTC_K zdotc_k
|
||||
#define ZNRM2_K znrm2_k
|
||||
#define ZSCAL_K zscal_k
|
||||
#define ZSUM_K zsum_k
|
||||
#define ZSWAP_K zswap_k
|
||||
#define ZROT_K zdrot_k
|
||||
|
||||
@@ -250,7 +249,6 @@
|
||||
#define ZDOTC_K gotoblas -> zdotc_k
|
||||
#define ZNRM2_K gotoblas -> znrm2_k
|
||||
#define ZSCAL_K gotoblas -> zscal_k
|
||||
#define ZSUM_K gotoblas -> zsum_k
|
||||
#define ZSWAP_K gotoblas -> zswap_k
|
||||
#define ZROT_K gotoblas -> zdrot_k
|
||||
|
||||
|
||||
140
common_zarch.h
140
common_zarch.h
@@ -1,140 +0,0 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011-2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
#ifndef COMMON_ZARCH
|
||||
#define COMMON_ZARCH
|
||||
|
||||
#define MB
|
||||
//__asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB
|
||||
//__asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
/*
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
BLASULONG ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"mov x4, #1 \n\t"
|
||||
"1: \n\t"
|
||||
"ldaxr x2, [%1] \n\t"
|
||||
"cbnz x2, 1b \n\t"
|
||||
"2: \n\t"
|
||||
"stxr w3, x4, [%1] \n\t"
|
||||
"cbnz w3, 1b \n\t"
|
||||
"mov %0, #0 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "x2" , "x3", "x4"
|
||||
|
||||
|
||||
);
|
||||
|
||||
|
||||
} while (ret);
|
||||
|
||||
}
|
||||
*/
|
||||
//#define BLAS_LOCK_DEFINED
|
||||
|
||||
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
|
||||
#endif
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef F_INTERFACE
|
||||
#define REALNAME ASMNAME
|
||||
#else
|
||||
#define REALNAME ASMFNAME
|
||||
#endif
|
||||
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.text ;\
|
||||
.align 256 ;\
|
||||
.global REALNAME ;\
|
||||
.type REALNAME, %function ;\
|
||||
REALNAME:
|
||||
|
||||
|
||||
#define EPILOGUE
|
||||
|
||||
#define PROFCODE
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#ifndef PAGESIZE
|
||||
#define PAGESIZE ( 4 << 10)
|
||||
#endif
|
||||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#if defined(CORTEXA57)
|
||||
#define BUFFER_SIZE (20 << 20)
|
||||
#else
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
#endif
|
||||
|
||||
|
||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS MAP_ANON
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
include ../Makefile.rule
|
||||
|
||||
all :: dgemv_tester dgemm_tester
|
||||
|
||||
dgemv_tester :
|
||||
$(CXX) $(COMMON_OPT) -Wall -Wextra -Wshadow -fopenmp -std=c++11 dgemv_thread_safety.cpp ../libopenblas.a -lpthread -o dgemv_tester
|
||||
./dgemv_tester
|
||||
|
||||
dgemm_tester : dgemv_tester
|
||||
$(CXX) $(COMMON_OPT) -Wall -Wextra -Wshadow -fopenmp -std=c++11 dgemm_thread_safety.cpp ../libopenblas.a -lpthread -o dgemm_tester
|
||||
./dgemm_tester
|
||||
|
||||
clean ::
|
||||
rm -f dgemv_tester dgemm_tester
|
||||
@@ -1,55 +0,0 @@
|
||||
inline void pauser(){
|
||||
/// a portable way to pause a program
|
||||
std::string dummy;
|
||||
std::cout << "Press enter to continue...";
|
||||
std::getline(std::cin, dummy);
|
||||
}
|
||||
|
||||
void FillMatrices(std::vector<std::vector<double>>& matBlock, std::mt19937_64& PRNG, std::uniform_real_distribution<double>& rngdist, const blasint randomMatSize, const uint32_t numConcurrentThreads, const uint32_t numMat){
|
||||
for(uint32_t i=0; i<numMat; i++){
|
||||
for(uint32_t j = 0; j < static_cast<uint32_t>(randomMatSize*randomMatSize); j++){
|
||||
matBlock[i][j] = rngdist(PRNG);
|
||||
}
|
||||
}
|
||||
for(uint32_t i=numMat; i<(numConcurrentThreads*numMat); i+=numMat){
|
||||
for(uint32_t j=0; j<numMat; j++){
|
||||
matBlock[i+j] = matBlock[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FillVectors(std::vector<std::vector<double>>& vecBlock, std::mt19937_64& PRNG, std::uniform_real_distribution<double>& rngdist, const blasint randomMatSize, const uint32_t numConcurrentThreads, const uint32_t numVec){
|
||||
for(uint32_t i=0; i<numVec; i++){
|
||||
for(uint32_t j = 0; j < static_cast<uint32_t>(randomMatSize); j++){
|
||||
vecBlock[i][j] = rngdist(PRNG);
|
||||
}
|
||||
}
|
||||
for(uint32_t i=numVec; i<(numConcurrentThreads*numVec); i+=numVec){
|
||||
for(uint32_t j=0; j<numVec; j++){
|
||||
vecBlock[i+j] = vecBlock[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::mt19937_64 InitPRNG(){
|
||||
std::random_device rd;
|
||||
std::mt19937_64 PRNG(rd()); //seed PRNG using /dev/urandom or similar OS provided RNG
|
||||
std::uniform_real_distribution<double> rngdist{-1.0, 1.0};
|
||||
//make sure the internal state of the PRNG is properly mixed by generating 10M random numbers
|
||||
//PRNGs often have unreliable distribution uniformity and other statistical properties before their internal state is sufficiently mixed
|
||||
for (uint32_t i=0;i<10000000;i++) rngdist(PRNG);
|
||||
return PRNG;
|
||||
}
|
||||
|
||||
void PrintMatrices(const std::vector<std::vector<double>>& matBlock, const blasint randomMatSize, const uint32_t numConcurrentThreads, const uint32_t numMat){
|
||||
for (uint32_t i=0;i<numConcurrentThreads*numMat;i++){
|
||||
std::cout<<i<<std::endl;
|
||||
for (uint32_t j = 0; j < static_cast<uint32_t>(randomMatSize); j++){
|
||||
for (uint32_t k = 0; k < static_cast<uint32_t>(randomMatSize); k++){
|
||||
std::cout<<matBlock[i][j*randomMatSize + k]<<" ";
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
}
|
||||
@@ -1,92 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <future>
|
||||
#include <omp.h>
|
||||
#include "../cblas.h"
|
||||
#include "cpp_thread_safety_common.h"
|
||||
|
||||
void launch_cblas_dgemm(double* A, double* B, double* C, const blasint randomMatSize){
|
||||
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, randomMatSize, randomMatSize, randomMatSize, 1.0, A, randomMatSize, B, randomMatSize, 0.1, C, randomMatSize);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]){
|
||||
blasint randomMatSize = 1024; //dimension of the random square matrices used
|
||||
uint32_t numConcurrentThreads = 52; //number of concurrent calls of the functions being tested
|
||||
uint32_t numTestRounds = 16; //number of testing rounds before success exit
|
||||
|
||||
if (argc > 4){
|
||||
std::cout<<"ERROR: too many arguments for thread safety tester"<<std::endl;
|
||||
abort();
|
||||
}
|
||||
|
||||
if(argc == 4){
|
||||
std::vector<std::string> cliArgs;
|
||||
for (int i = 1; i < argc; i++){
|
||||
cliArgs.push_back(argv[i]);
|
||||
std::cout<<argv[i]<<std::endl;
|
||||
}
|
||||
randomMatSize = std::stoul(cliArgs[0]);
|
||||
numConcurrentThreads = std::stoul(cliArgs[1]);
|
||||
numTestRounds = std::stoul(cliArgs[2]);
|
||||
}
|
||||
|
||||
std::uniform_real_distribution<double> rngdist{-1.0, 1.0};
|
||||
std::vector<std::vector<double>> matBlock(numConcurrentThreads*3);
|
||||
std::vector<std::future<void>> futureBlock(numConcurrentThreads);
|
||||
|
||||
std::cout<<"*----------------------------*\n";
|
||||
std::cout<<"| DGEMM thread safety tester |\n";
|
||||
std::cout<<"*----------------------------*\n";
|
||||
std::cout<<"Size of random matrices(N=M=K): "<<randomMatSize<<'\n';
|
||||
std::cout<<"Number of concurrent calls into OpenBLAS : "<<numConcurrentThreads<<'\n';
|
||||
std::cout<<"Number of testing rounds : "<<numTestRounds<<'\n';
|
||||
std::cout<<"This test will need "<<(static_cast<uint64_t>(randomMatSize*randomMatSize)*numConcurrentThreads*3*8)/static_cast<double>(1024*1024)<<" MiB of RAM\n"<<std::endl;
|
||||
|
||||
std::cout<<"Initializing random number generator..."<<std::flush;
|
||||
std::mt19937_64 PRNG = InitPRNG();
|
||||
std::cout<<"done\n";
|
||||
|
||||
std::cout<<"Preparing to test CBLAS DGEMM thread safety\n";
|
||||
std::cout<<"Allocating matrices..."<<std::flush;
|
||||
for(uint32_t i=0; i<(numConcurrentThreads*3); i++){
|
||||
matBlock[i].resize(randomMatSize*randomMatSize);
|
||||
}
|
||||
std::cout<<"done\n";
|
||||
//pauser();
|
||||
std::cout<<"Filling matrices with random numbers..."<<std::flush;
|
||||
FillMatrices(matBlock, PRNG, rngdist, randomMatSize, numConcurrentThreads, 3);
|
||||
//PrintMatrices(matBlock, randomMatSize, numConcurrentThreads, 3);
|
||||
std::cout<<"done\n";
|
||||
std::cout<<"Testing CBLAS DGEMM thread safety\n";
|
||||
omp_set_num_threads(numConcurrentThreads);
|
||||
for(uint32_t R=0; R<numTestRounds; R++){
|
||||
std::cout<<"DGEMM round #"<<R<<std::endl;
|
||||
std::cout<<"Launching "<<numConcurrentThreads<<" threads simultaneously using OpenMP..."<<std::flush;
|
||||
#pragma omp parallel for default(none) shared(futureBlock, matBlock, randomMatSize, numConcurrentThreads)
|
||||
for(uint32_t i=0; i<numConcurrentThreads; i++){
|
||||
futureBlock[i] = std::async(std::launch::async, launch_cblas_dgemm, &matBlock[i*3][0], &matBlock[i*3+1][0], &matBlock[i*3+2][0], randomMatSize);
|
||||
//launch_cblas_dgemm( &matBlock[i][0], &matBlock[i+1][0], &matBlock[i+2][0]);
|
||||
}
|
||||
std::cout<<"done\n";
|
||||
std::cout<<"Waiting for threads to finish..."<<std::flush;
|
||||
for(uint32_t i=0; i<numConcurrentThreads; i++){
|
||||
futureBlock[i].get();
|
||||
}
|
||||
std::cout<<"done\n";
|
||||
//PrintMatrices(matBlock, randomMatSize, numConcurrentThreads, 3);
|
||||
std::cout<<"Comparing results from different threads..."<<std::flush;
|
||||
for(uint32_t i=3; i<(numConcurrentThreads*3); i+=3){ //i is the index of matrix A, for a given thread
|
||||
for(uint32_t j = 0; j < static_cast<uint32_t>(randomMatSize*randomMatSize); j++){
|
||||
if (std::abs(matBlock[i+2][j] - matBlock[2][j]) > 1.0E-13){ //i+2 is the index of matrix C, for a given thread
|
||||
std::cout<<"ERROR: one of the threads returned a different result! Index : "<<i+2<<std::endl;
|
||||
std::cout<<"CBLAS DGEMM thread safety test FAILED!"<<std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout<<"OK!\n"<<std::endl;
|
||||
}
|
||||
std::cout<<"CBLAS DGEMM thread safety test PASSED!\n"<<std::endl;
|
||||
return 0;
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <future>
|
||||
#include <omp.h>
|
||||
#include "../cblas.h"
|
||||
#include "cpp_thread_safety_common.h"
|
||||
|
||||
void launch_cblas_dgemv(double* A, double* x, double* y, const blasint randomMatSize){
|
||||
const blasint inc = 1;
|
||||
cblas_dgemv(CblasColMajor, CblasNoTrans, randomMatSize, randomMatSize, 1.0, A, randomMatSize, x, inc, 0.1, y, inc);
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]){
|
||||
blasint randomMatSize = 1024; //dimension of the random square matrices and vectors being used
|
||||
uint32_t numConcurrentThreads = 52; //number of concurrent calls of the functions being tested
|
||||
uint32_t numTestRounds = 16; //number of testing rounds before success exit
|
||||
|
||||
if (argc > 4){
|
||||
std::cout<<"ERROR: too many arguments for thread safety tester"<<std::endl;
|
||||
abort();
|
||||
}
|
||||
if(argc == 4){
|
||||
std::vector<std::string> cliArgs;
|
||||
for (int i = 1; i < argc; i++){
|
||||
cliArgs.push_back(argv[i]);
|
||||
std::cout<<argv[i]<<std::endl;
|
||||
}
|
||||
randomMatSize = std::stoul(cliArgs.at(0));
|
||||
numConcurrentThreads = std::stoul(cliArgs.at(1));
|
||||
numTestRounds = std::stoul(cliArgs.at(2));
|
||||
}
|
||||
|
||||
std::uniform_real_distribution<double> rngdist{-1.0, 1.0};
|
||||
std::vector<std::vector<double>> matBlock(numConcurrentThreads);
|
||||
std::vector<std::vector<double>> vecBlock(numConcurrentThreads*2);
|
||||
std::vector<std::future<void>> futureBlock(numConcurrentThreads);
|
||||
|
||||
std::cout<<"*----------------------------*\n";
|
||||
std::cout<<"| DGEMV thread safety tester |\n";
|
||||
std::cout<<"*----------------------------*\n";
|
||||
std::cout<<"Size of random matrices and vectors(N=M): "<<randomMatSize<<'\n';
|
||||
std::cout<<"Number of concurrent calls into OpenBLAS : "<<numConcurrentThreads<<'\n';
|
||||
std::cout<<"Number of testing rounds : "<<numTestRounds<<'\n';
|
||||
std::cout<<"This test will need "<<((static_cast<uint64_t>(randomMatSize*randomMatSize)*numConcurrentThreads*8)+(static_cast<uint64_t>(randomMatSize)*numConcurrentThreads*8*2))/static_cast<double>(1024*1024)<<" MiB of RAM\n"<<std::endl;
|
||||
|
||||
std::cout<<"Initializing random number generator..."<<std::flush;
|
||||
std::mt19937_64 PRNG = InitPRNG();
|
||||
std::cout<<"done\n";
|
||||
|
||||
std::cout<<"Preparing to test CBLAS DGEMV thread safety\n";
|
||||
std::cout<<"Allocating matrices..."<<std::flush;
|
||||
for(uint32_t i=0; i<numConcurrentThreads; i++){
|
||||
matBlock.at(i).resize(randomMatSize*randomMatSize);
|
||||
}
|
||||
std::cout<<"done\n";
|
||||
std::cout<<"Allocating vectors..."<<std::flush;
|
||||
for(uint32_t i=0; i<(numConcurrentThreads*2); i++){
|
||||
vecBlock.at(i).resize(randomMatSize);
|
||||
}
|
||||
std::cout<<"done\n";
|
||||
//pauser();
|
||||
|
||||
std::cout<<"Filling matrices with random numbers..."<<std::flush;
|
||||
FillMatrices(matBlock, PRNG, rngdist, randomMatSize, numConcurrentThreads, 1);
|
||||
//PrintMatrices(matBlock, randomMatSize, numConcurrentThreads);
|
||||
std::cout<<"done\n";
|
||||
std::cout<<"Filling vectors with random numbers..."<<std::flush;
|
||||
FillVectors(vecBlock, PRNG, rngdist, randomMatSize, numConcurrentThreads, 2);
|
||||
std::cout<<"done\n";
|
||||
|
||||
std::cout<<"Testing CBLAS DGEMV thread safety"<<std::endl;
|
||||
omp_set_num_threads(numConcurrentThreads);
|
||||
for(uint32_t R=0; R<numTestRounds; R++){
|
||||
std::cout<<"DGEMV round #"<<R<<std::endl;
|
||||
std::cout<<"Launching "<<numConcurrentThreads<<" threads simultaneously using OpenMP..."<<std::flush;
|
||||
#pragma omp parallel for default(none) shared(futureBlock, matBlock, vecBlock, randomMatSize, numConcurrentThreads)
|
||||
for(uint32_t i=0; i<numConcurrentThreads; i++){
|
||||
futureBlock[i] = std::async(std::launch::async, launch_cblas_dgemv, &matBlock[i][0], &vecBlock[i*2][0], &vecBlock[i*2+1][0], randomMatSize);
|
||||
}
|
||||
std::cout<<"done\n";
|
||||
std::cout<<"Waiting for threads to finish..."<<std::flush;
|
||||
for(uint32_t i=0; i<numConcurrentThreads; i++){
|
||||
futureBlock[i].get();
|
||||
}
|
||||
std::cout<<"done\n";
|
||||
std::cout<<"Comparing results from different threads..."<<std::flush;
|
||||
for(uint32_t i=2; i<(numConcurrentThreads*2); i+=2){ //i is the index of vector x, for a given thread
|
||||
for(uint32_t j = 0; j < static_cast<uint32_t>(randomMatSize); j++){
|
||||
if (std::abs(vecBlock[i+1][j] - vecBlock[1][j]) > 1.0E-13){ //i+1 is the index of vector y, for a given thread
|
||||
std::cout<<"ERROR: one of the threads returned a different result! Index : "<<i+1<<std::endl;
|
||||
std::cout<<"CBLAS DGEMV thread safety test FAILED!"<<std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout<<"OK!\n"<<std::endl;
|
||||
}
|
||||
std::cout<<"CBLAS DGEMV thread safety test PASSED!\n"<<std::endl;
|
||||
return 0;
|
||||
}
|
||||
11
cpuid.h
11
cpuid.h
@@ -53,7 +53,6 @@
|
||||
#define VENDOR_SIS 8
|
||||
#define VENDOR_TRANSMETA 9
|
||||
#define VENDOR_NSC 10
|
||||
#define VENDOR_HYGON 11
|
||||
#define VENDOR_UNKNOWN 99
|
||||
|
||||
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
||||
@@ -115,9 +114,6 @@
|
||||
#define CORE_HASWELL 24
|
||||
#define CORE_STEAMROLLER 25
|
||||
#define CORE_EXCAVATOR 26
|
||||
#define CORE_ZEN 27
|
||||
#define CORE_SKYLAKEX 28
|
||||
#define CORE_DHYANA 29
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
@@ -140,8 +136,6 @@
|
||||
#define HAVE_AVX (1 << 18)
|
||||
#define HAVE_FMA4 (1 << 19)
|
||||
#define HAVE_FMA3 (1 << 20)
|
||||
#define HAVE_AVX512VL (1 << 21)
|
||||
#define HAVE_AVX2 (1 << 22)
|
||||
|
||||
#define CACHE_INFO_L1_I 1
|
||||
#define CACHE_INFO_L1_D 2
|
||||
@@ -215,10 +209,5 @@ typedef struct {
|
||||
#define CPUTYPE_HASWELL 48
|
||||
#define CPUTYPE_STEAMROLLER 49
|
||||
#define CPUTYPE_EXCAVATOR 50
|
||||
#define CPUTYPE_ZEN 51
|
||||
#define CPUTYPE_SKYLAKEX 52
|
||||
#define CPUTYPE_DHYANA 53
|
||||
|
||||
#define CPUTYPE_HYGON_UNKNOWN 54
|
||||
|
||||
#endif
|
||||
|
||||
28
cpuid_arm.c
28
cpuid_arm.c
@@ -34,7 +34,7 @@
|
||||
#define CPU_CORTEXA15 4
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"UNKOWN",
|
||||
"ARMV6",
|
||||
"ARMV7",
|
||||
"CORTEXA9",
|
||||
@@ -74,7 +74,7 @@ int get_feature(char *search)
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return 0;
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
@@ -115,9 +115,6 @@ int detect(void)
|
||||
if (strstr(p, "0xc0f")) {
|
||||
return CPU_CORTEXA15;
|
||||
}
|
||||
if (strstr(p, "0xd07")) {
|
||||
return CPU_ARMV7; //ARMV8 on 32-bit
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -161,27 +158,6 @@ int detect(void)
|
||||
|
||||
|
||||
}
|
||||
|
||||
p = (char *) NULL ;
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if ((!strncmp("CPU architecture", buffer, 16)))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
if(p != NULL) {
|
||||
if (strstr(p, "8")) {
|
||||
return CPU_ARMV7; //ARMV8 on 32-bit
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
return CPU_UNKNOWN;
|
||||
|
||||
184
cpuid_arm64.c
184
cpuid_arm64.c
@@ -29,43 +29,18 @@
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV8 1
|
||||
// Arm
|
||||
#define CPU_CORTEXA53 2
|
||||
#define CPU_CORTEXA57 3
|
||||
#define CPU_CORTEXA72 4
|
||||
#define CPU_CORTEXA73 5
|
||||
// Qualcomm
|
||||
#define CPU_FALKOR 6
|
||||
// Cavium
|
||||
#define CPU_THUNDERX 7
|
||||
#define CPU_THUNDERX2T99 8
|
||||
//Hisilicon
|
||||
#define CPU_TSV110 9
|
||||
#define CPU_CORTEXA57 2
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"ARMV8" ,
|
||||
"CORTEXA53",
|
||||
"CORTEXA57",
|
||||
"CORTEXA72",
|
||||
"CORTEXA73",
|
||||
"FALKOR",
|
||||
"THUNDERX",
|
||||
"THUNDERX2T99",
|
||||
"TSV110"
|
||||
"CORTEXA57"
|
||||
};
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
"unknown",
|
||||
"armv8",
|
||||
"cortexa53",
|
||||
"cortexa57",
|
||||
"cortexa72",
|
||||
"cortexa73",
|
||||
"falkor",
|
||||
"thunderx",
|
||||
"thunderx2t99",
|
||||
"tsv110"
|
||||
"armv8" ,
|
||||
"cortexa57"
|
||||
};
|
||||
|
||||
int get_feature(char *search)
|
||||
@@ -94,7 +69,7 @@ int get_feature(char *search)
|
||||
if( p == NULL ) return 0;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( (t = strtok(NULL," ")))
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
if (!strcmp(t, search)) { return(1); }
|
||||
}
|
||||
@@ -110,48 +85,25 @@ int detect(void)
|
||||
#ifdef linux
|
||||
|
||||
FILE *infile;
|
||||
char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
|
||||
p = (char *) NULL ;
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
while (fgets(buffer, sizeof(buffer), infile)) {
|
||||
if ((cpu_part != NULL) && (cpu_implementer != NULL)) {
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("CPU part", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
|
||||
if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) {
|
||||
cpu_part = strchr(buffer, ':') + 2;
|
||||
cpu_part = strdup(cpu_part);
|
||||
} else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) {
|
||||
cpu_implementer = strchr(buffer, ':') + 2;
|
||||
cpu_implementer = strdup(cpu_implementer);
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
if(cpu_part != NULL && cpu_implementer != NULL) {
|
||||
// Arm
|
||||
if (strstr(cpu_implementer, "0x41")) {
|
||||
if (strstr(cpu_part, "0xd03"))
|
||||
return CPU_CORTEXA53;
|
||||
else if (strstr(cpu_part, "0xd07"))
|
||||
return CPU_CORTEXA57;
|
||||
else if (strstr(cpu_part, "0xd08"))
|
||||
return CPU_CORTEXA72;
|
||||
else if (strstr(cpu_part, "0xd09"))
|
||||
return CPU_CORTEXA73;
|
||||
}
|
||||
// Qualcomm
|
||||
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
|
||||
return CPU_FALKOR;
|
||||
// Cavium
|
||||
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0a1"))
|
||||
return CPU_THUNDERX;
|
||||
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
|
||||
return CPU_THUNDERX2T99;
|
||||
// HiSilicon
|
||||
else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01"))
|
||||
return CPU_TSV110;
|
||||
if(p != NULL) {
|
||||
if (strstr(p, "0xd07")) {
|
||||
return CPU_CORTEXA57;
|
||||
}
|
||||
}
|
||||
|
||||
p = (char *) NULL ;
|
||||
@@ -172,7 +124,7 @@ int detect(void)
|
||||
if(p != NULL)
|
||||
{
|
||||
|
||||
if ((strstr(p, "AArch64")) || (strstr(p, "8")))
|
||||
if (strstr(p, "AArch64"))
|
||||
{
|
||||
return CPU_ARMV8;
|
||||
|
||||
@@ -209,105 +161,37 @@ void get_subdirname(void)
|
||||
void get_cpuconfig(void)
|
||||
{
|
||||
|
||||
// All arches should define ARMv8
|
||||
printf("#define ARMV8\n");
|
||||
printf("#define HAVE_NEON\n"); // This shouldn't be necessary
|
||||
printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_CORTEXA53:
|
||||
printf("#define %s\n", cpuname[d]);
|
||||
// Fall-through
|
||||
case CPU_ARMV8:
|
||||
// Minimum parameters for ARMv8 (based on A53)
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 262144\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
printf("#define ARMV8\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 262144\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
case CPU_CORTEXA57:
|
||||
case CPU_CORTEXA72:
|
||||
case CPU_CORTEXA73:
|
||||
// Common minimum settings for these Arm cores
|
||||
// Can change a lot, but we need to be conservative
|
||||
// TODO: detect info from /sys if possible
|
||||
printf("#define %s\n", cpuname[d]);
|
||||
printf("#define CORTEXA57\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
printf("#define HAVE_NEON\n");
|
||||
printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_CODE_SIZE 49152\n");
|
||||
printf("#define L1_CODE_LINESIZE 64\n");
|
||||
printf("#define L1_CODE_ASSOCIATIVE 3\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L1_DATA_ASSOCIATIVE 2\n");
|
||||
printf("#define L2_SIZE 524288\n");
|
||||
printf("#define L2_SIZE 2097152\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define L2_ASSOCIATIVE 16\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
|
||||
case CPU_FALKOR:
|
||||
printf("#define FALKOR\n");
|
||||
printf("#define L1_CODE_SIZE 65536\n");
|
||||
printf("#define L1_CODE_LINESIZE 64\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 128\n");
|
||||
printf("#define L2_SIZE 524288\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 16\n");
|
||||
break;
|
||||
|
||||
case CPU_THUNDERX:
|
||||
printf("#define THUNDERX\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 128\n");
|
||||
printf("#define L2_SIZE 16777216\n");
|
||||
printf("#define L2_LINESIZE 128\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 16\n");
|
||||
break;
|
||||
|
||||
case CPU_THUNDERX2T99:
|
||||
printf("#define THUNDERX2T99 \n");
|
||||
printf("#define L1_CODE_SIZE 32768 \n");
|
||||
printf("#define L1_CODE_LINESIZE 64 \n");
|
||||
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
|
||||
printf("#define L1_DATA_SIZE 32768 \n");
|
||||
printf("#define L1_DATA_LINESIZE 64 \n");
|
||||
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
|
||||
printf("#define L2_SIZE 262144 \n");
|
||||
printf("#define L2_LINESIZE 64 \n");
|
||||
printf("#define L2_ASSOCIATIVE 8 \n");
|
||||
printf("#define L3_SIZE 33554432 \n");
|
||||
printf("#define L3_LINESIZE 64 \n");
|
||||
printf("#define L3_ASSOCIATIVE 32 \n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||
printf("#define DTB_SIZE 4096 \n");
|
||||
break;
|
||||
|
||||
case CPU_TSV110:
|
||||
printf("#define TSV110 \n");
|
||||
printf("#define L1_CODE_SIZE 65536 \n");
|
||||
printf("#define L1_CODE_LINESIZE 64 \n");
|
||||
printf("#define L1_CODE_ASSOCIATIVE 4 \n");
|
||||
printf("#define L1_DATA_SIZE 65536 \n");
|
||||
printf("#define L1_DATA_LINESIZE 64 \n");
|
||||
printf("#define L1_DATA_ASSOCIATIVE 4 \n");
|
||||
printf("#define L2_SIZE 524228 \n");
|
||||
printf("#define L2_LINESIZE 64 \n");
|
||||
printf("#define L2_ASSOCIATIVE 8 \n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||
printf("#define DTB_SIZE 4096 \n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -344,7 +228,7 @@ void get_features(void)
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( (t = strtok(NULL," ")))
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
110
cpuid_mips.c
110
cpuid_mips.c
@@ -71,13 +71,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
/*********************************************************************/
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_P5600 1
|
||||
#define CPU_1004K 2
|
||||
#define CPU_SICORTEX 1
|
||||
#define CPU_LOONGSON3A 2
|
||||
#define CPU_LOONGSON3B 3
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"P5600",
|
||||
"1004K"
|
||||
"UNKOWN",
|
||||
"SICORTEX",
|
||||
"LOONGSON3A",
|
||||
"LOONGSON3B"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
@@ -92,7 +94,7 @@ int detect(void){
|
||||
if (!strncmp("cpu", buffer, 3)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
#if 0
|
||||
fprintf(stderr, "%s \n", p);
|
||||
fprintf(stderr, "%s\n", p);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
@@ -101,12 +103,42 @@ int detect(void){
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL){
|
||||
if (strstr(p, "5600")) {
|
||||
return CPU_P5600;
|
||||
} else if (strstr(p, "1004K")) {
|
||||
return CPU_1004K;
|
||||
} else
|
||||
return CPU_UNKNOWN;
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}else if (strstr(p, "Loongson-3")){
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("system type", buffer, 11)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
if (strstr(p, "loongson3a"))
|
||||
return CPU_LOONGSON3A;
|
||||
}else{
|
||||
return CPU_SICORTEX;
|
||||
}
|
||||
}
|
||||
//Check model name for Loongson3
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("model name", buffer, 10)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
if(p != NULL){
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return CPU_UNKNOWN;
|
||||
@@ -117,50 +149,64 @@ char *get_corename(void){
|
||||
}
|
||||
|
||||
void get_architecture(void){
|
||||
printf("MIPS");
|
||||
printf("MIPS64");
|
||||
}
|
||||
|
||||
void get_subarchitecture(void){
|
||||
if(detect()==CPU_P5600|| detect()==CPU_1004K){
|
||||
printf("P5600");
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("LOONGSON3A");
|
||||
}else if(detect()==CPU_LOONGSON3B){
|
||||
printf("LOONGSON3B");
|
||||
}else{
|
||||
printf("UNKNOWN");
|
||||
printf("SICORTEX");
|
||||
}
|
||||
}
|
||||
|
||||
void get_subdirname(void){
|
||||
printf("mips");
|
||||
printf("mips64");
|
||||
}
|
||||
|
||||
void get_cpuconfig(void){
|
||||
if(detect()==CPU_P5600){
|
||||
printf("#define P5600\n");
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("#define LOONGSON3A\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
} else if (detect()==CPU_1004K) {
|
||||
printf("#define MIPS1004K\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}else if(detect()==CPU_LOONGSON3B){
|
||||
printf("#define LOONGSON3B\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 26144\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 8\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}else{
|
||||
printf("#define UNKNOWN\n");
|
||||
printf("#define SICORTEX\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}
|
||||
}
|
||||
|
||||
void get_libname(void){
|
||||
if(detect()==CPU_P5600) {
|
||||
printf("p5600\n");
|
||||
} else if (detect()==CPU_1004K) {
|
||||
printf("1004K\n");
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("loongson3a\n");
|
||||
}else if(detect()==CPU_LOONGSON3B) {
|
||||
printf("loongson3b\n");
|
||||
}else{
|
||||
printf("mips\n");
|
||||
#ifdef __mips64
|
||||
printf("mips64\n");
|
||||
#else
|
||||
printf("mips32\n");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
253
cpuid_mips64.c
253
cpuid_mips64.c
@@ -1,253 +0,0 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_SICORTEX 1
|
||||
#define CPU_LOONGSON3A 2
|
||||
#define CPU_LOONGSON3B 3
|
||||
#define CPU_I6400 4
|
||||
#define CPU_P6600 5
|
||||
#define CPU_I6500 6
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"SICORTEX",
|
||||
"LOONGSON3A",
|
||||
"LOONGSON3B",
|
||||
"I6400",
|
||||
"P6600",
|
||||
"I6500"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
|
||||
p = (char *)NULL;
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("cpu", buffer, 3)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
#if 0
|
||||
fprintf(stderr, "%s\n", p);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL){
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}else if (strstr(p, "Loongson-3")){
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("system type", buffer, 11)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
if (strstr(p, "loongson3a"))
|
||||
return CPU_LOONGSON3A;
|
||||
}else{
|
||||
return CPU_SICORTEX;
|
||||
}
|
||||
}
|
||||
//Check model name for Loongson3
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("model name", buffer, 10)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
if(p != NULL){
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
|
||||
char *get_corename(void){
|
||||
return cpuname[detect()];
|
||||
}
|
||||
|
||||
void get_architecture(void){
|
||||
printf("MIPS64");
|
||||
}
|
||||
|
||||
void get_subarchitecture(void){
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("LOONGSON3A");
|
||||
}else if(detect()==CPU_LOONGSON3B){
|
||||
printf("LOONGSON3B");
|
||||
}else if(detect()==CPU_I6400){
|
||||
printf("I6400");
|
||||
}else if(detect()==CPU_P6600){
|
||||
printf("P6600");
|
||||
}else if(detect()==CPU_I6500){
|
||||
printf("I6500");
|
||||
}else{
|
||||
printf("SICORTEX");
|
||||
}
|
||||
}
|
||||
|
||||
void get_subdirname(void){
|
||||
printf("mips64");
|
||||
}
|
||||
|
||||
void get_cpuconfig(void){
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("#define LOONGSON3A\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}else if(detect()==CPU_LOONGSON3B){
|
||||
printf("#define LOONGSON3B\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}else if(detect()==CPU_I6400){
|
||||
printf("#define I6400\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}else if(detect()==CPU_P6600){
|
||||
printf("#define P6600\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}else if(detect()==CPU_I6500){
|
||||
printf("#define I6500\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}else{
|
||||
printf("#define SICORTEX\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 512488\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}
|
||||
}
|
||||
|
||||
void get_libname(void){
|
||||
if(detect()==CPU_LOONGSON3A) {
|
||||
printf("loongson3a\n");
|
||||
}else if(detect()==CPU_LOONGSON3B) {
|
||||
printf("loongson3b\n");
|
||||
}else if(detect()==CPU_I6400) {
|
||||
printf("i6400\n");
|
||||
}else if(detect()==CPU_P6600) {
|
||||
printf("p6600\n");
|
||||
}else if(detect()==CPU_I6500) {
|
||||
printf("i6500\n");
|
||||
}else{
|
||||
printf("mips64\n");
|
||||
}
|
||||
}
|
||||
@@ -55,8 +55,6 @@
|
||||
#define CPUTYPE_POWER6 5
|
||||
#define CPUTYPE_CELL 6
|
||||
#define CPUTYPE_PPCG4 7
|
||||
#define CPUTYPE_POWER8 8
|
||||
#define CPUTYPE_POWER9 9
|
||||
|
||||
char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
@@ -67,8 +65,6 @@ char *cpuname[] = {
|
||||
"POWER6",
|
||||
"CELL",
|
||||
"PPCG4",
|
||||
"POWER8",
|
||||
"POWER9"
|
||||
};
|
||||
|
||||
char *lowercpuname[] = {
|
||||
@@ -80,8 +76,6 @@ char *lowercpuname[] = {
|
||||
"power6",
|
||||
"cell",
|
||||
"ppcg4",
|
||||
"power8",
|
||||
"power9"
|
||||
};
|
||||
|
||||
char *corename[] = {
|
||||
@@ -93,8 +87,6 @@ char *corename[] = {
|
||||
"POWER6",
|
||||
"CELL",
|
||||
"PPCG4",
|
||||
"POWER8",
|
||||
"POWER9"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
@@ -123,8 +115,7 @@ int detect(void){
|
||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
|
||||
@@ -132,33 +123,6 @@ int detect(void){
|
||||
#endif
|
||||
|
||||
#ifdef _AIX
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
|
||||
p = (char *)NULL;
|
||||
infile = popen("prtconf|grep 'Processor Type'", "r");
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("Pro", buffer, 3)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
#if 0
|
||||
fprintf(stderr, "%s\n", p);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pclose(infile);
|
||||
|
||||
if (!strncasecmp(p, "POWER3", 6)) return CPUTYPE_POWER3;
|
||||
if (!strncasecmp(p, "POWER4", 6)) return CPUTYPE_POWER4;
|
||||
if (!strncasecmp(p, "PPC970", 6)) return CPUTYPE_PPC970;
|
||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
return CPUTYPE_POWER5;
|
||||
#endif
|
||||
|
||||
@@ -174,52 +138,6 @@ int detect(void){
|
||||
|
||||
return CPUTYPE_PPC970;
|
||||
#endif
|
||||
|
||||
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
|
||||
int id;
|
||||
__asm __volatile("mfpvr %0" : "=r"(id));
|
||||
switch ( id >> 16 ) {
|
||||
case 0x4e: // POWER9
|
||||
return CPUTYPE_POWER9;
|
||||
break;
|
||||
case 0x4d:
|
||||
case 0x4b: // POWER8/8E
|
||||
return CPUTYPE_POWER8;
|
||||
break;
|
||||
case 0x4a:
|
||||
case 0x3f: // POWER7/7E
|
||||
return CPUTYPE_POWER6;
|
||||
break;
|
||||
case 0x3e:
|
||||
return CPUTYPE_POWER6;
|
||||
break;
|
||||
case 0x3a:
|
||||
return CPUTYPE_POWER5;
|
||||
break;
|
||||
case 0x35:
|
||||
case 0x38: // POWER4 /4+
|
||||
return CPUTYPE_POWER4;
|
||||
break;
|
||||
case 0x40:
|
||||
case 0x41: // POWER3 /3+
|
||||
return CPUTYPE_POWER3;
|
||||
break;
|
||||
case 0x39:
|
||||
case 0x3c:
|
||||
case 0x44:
|
||||
case 0x45:
|
||||
return CPUTYPE_PPC970;
|
||||
break;
|
||||
case 0x70:
|
||||
return CPUTYPE_CELL;
|
||||
break;
|
||||
case 0x8003:
|
||||
return CPUTYPE_PPCG4;
|
||||
break;
|
||||
default:
|
||||
return CPUTYPE_UNKNOWN;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void get_architecture(void){
|
||||
|
||||
@@ -49,7 +49,6 @@ void get_subdirname(void){
|
||||
}
|
||||
|
||||
void get_cpuconfig(void){
|
||||
printf("#define SPARC\n");
|
||||
printf("#define V9\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
}
|
||||
@@ -57,8 +56,3 @@ void get_cpuconfig(void){
|
||||
void get_libname(void){
|
||||
printf("v9\n");
|
||||
}
|
||||
|
||||
char *get_corename(void){
|
||||
return "sparc";
|
||||
}
|
||||
|
||||
|
||||
434
cpuid_x86.c
434
cpuid_x86.c
@@ -50,8 +50,6 @@
|
||||
#ifdef NO_AVX
|
||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
|
||||
#define CORE_HASWELL CORE_NEHALEM
|
||||
#define CPUTYPE_SKYLAKEX CPUTYPE_NEHALEM
|
||||
#define CORE_SKYLAKEX CORE_NEHALEM
|
||||
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
|
||||
#define CORE_SANDYBRIDGE CORE_NEHALEM
|
||||
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
|
||||
@@ -73,23 +71,12 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
|
||||
*edx = cpuInfo[3];
|
||||
}
|
||||
|
||||
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
|
||||
{
|
||||
int cpuInfo[4] = {-1};
|
||||
__cpuidex(cpuInfo, op, count);
|
||||
*eax = cpuInfo[0];
|
||||
*ebx = cpuInfo[1];
|
||||
*ecx = cpuInfo[2];
|
||||
*edx = cpuInfo[3];
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#ifndef CPUIDEMU
|
||||
|
||||
#if defined(__APPLE__) && defined(__i386__)
|
||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
|
||||
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
|
||||
#else
|
||||
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
@@ -97,23 +84,10 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
("mov %%ebx, %%edi;"
|
||||
"cpuid;"
|
||||
"xchgl %%ebx, %%edi;"
|
||||
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op), "c" (0) : "cc");
|
||||
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
|
||||
#else
|
||||
__asm__ __volatile__
|
||||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) , "c" (0) : "cc");
|
||||
#endif
|
||||
}
|
||||
|
||||
static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
__asm__ __volatile__
|
||||
("mov %%ebx, %%edi;"
|
||||
"cpuid;"
|
||||
"xchgl %%ebx, %%edi;"
|
||||
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
|
||||
#else
|
||||
__asm__ __volatile__
|
||||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
|
||||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
@@ -159,10 +133,6 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
|
||||
*edx = idlist[current].d;
|
||||
}
|
||||
|
||||
void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
|
||||
return cpuid (op, eax, ebx, ecx, edx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // _MSC_VER
|
||||
@@ -211,44 +181,6 @@ int support_avx(){
|
||||
#endif
|
||||
}
|
||||
|
||||
int support_avx2(){
|
||||
#ifndef NO_AVX2
|
||||
int eax, ebx, ecx=0, edx;
|
||||
int ret=0;
|
||||
|
||||
if (!support_avx())
|
||||
return 0;
|
||||
cpuid(7, &eax, &ebx, &ecx, &edx);
|
||||
if((ebx & (1<<7)) != 0)
|
||||
ret=1; //OS supports AVX2
|
||||
return ret;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int support_avx512(){
|
||||
#if !defined(NO_AVX) && !defined(NO_AVX512)
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
||||
if (!support_avx())
|
||||
return 0;
|
||||
cpuid(7, &eax, &ebx, &ecx, &edx);
|
||||
if((ebx & 32) != 32){
|
||||
ret=0; //OS does not even support AVX2
|
||||
}
|
||||
if((ebx & (1<<31)) != 0){
|
||||
xgetbv(0, &eax, &edx);
|
||||
if((eax & 0xe0) == 0xe0)
|
||||
ret=1; //OS supports AVX512VL
|
||||
}
|
||||
return ret;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int get_vendor(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
@@ -271,7 +203,6 @@ int get_vendor(void){
|
||||
if (!strcmp(vendor, " SiS SiS SiS")) return VENDOR_SIS;
|
||||
if (!strcmp(vendor, "GenuineTMx86")) return VENDOR_TRANSMETA;
|
||||
if (!strcmp(vendor, "Geode by NSC")) return VENDOR_NSC;
|
||||
if (!strcmp(vendor, "HygonGenuine")) return VENDOR_HYGON;
|
||||
|
||||
if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
|
||||
|
||||
@@ -333,8 +264,6 @@ int get_cputype(int gettype){
|
||||
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2;
|
||||
#ifndef NO_AVX
|
||||
if (support_avx()) feature |= HAVE_AVX;
|
||||
if (support_avx2()) feature |= HAVE_AVX2;
|
||||
if (support_avx512()) feature |= HAVE_AVX512VL;
|
||||
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
|
||||
#endif
|
||||
|
||||
@@ -383,9 +312,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
|
||||
|
||||
if (cpuid_level > 1) {
|
||||
int numcalls =0 ;
|
||||
|
||||
cpuid(2, &eax, &ebx, &ecx, &edx);
|
||||
numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
|
||||
|
||||
info[ 0] = BITMASK(eax, 8, 0xff);
|
||||
info[ 1] = BITMASK(eax, 16, 0xff);
|
||||
info[ 2] = BITMASK(eax, 24, 0xff);
|
||||
@@ -406,6 +335,7 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
info[14] = BITMASK(edx, 24, 0xff);
|
||||
|
||||
for (i = 0; i < 15; i++){
|
||||
|
||||
switch (info[i]){
|
||||
|
||||
/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
|
||||
@@ -706,14 +636,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
LD1.associative = 8;
|
||||
LD1.linesize = 64;
|
||||
break;
|
||||
case 0x63 :
|
||||
DTB.size = 2048;
|
||||
DTB.associative = 4;
|
||||
DTB.linesize = 32;
|
||||
LDTB.size = 4096;
|
||||
LDTB.associative= 4;
|
||||
LDTB.linesize = 32;
|
||||
break;
|
||||
case 0x66 :
|
||||
LD1.size = 8;
|
||||
LD1.associative = 4;
|
||||
@@ -745,14 +667,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
LC1.size = 64;
|
||||
LC1.associative = 8;
|
||||
break;
|
||||
case 0x76 :
|
||||
ITB.size = 2048;
|
||||
ITB.associative = 0;
|
||||
ITB.linesize = 8;
|
||||
LITB.size = 4096;
|
||||
LITB.associative= 0;
|
||||
LITB.linesize = 8;
|
||||
break;
|
||||
case 0x77 :
|
||||
LC1.size = 16;
|
||||
LC1.associative = 4;
|
||||
@@ -963,67 +877,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
}
|
||||
|
||||
if (get_vendor() == VENDOR_INTEL) {
|
||||
if(LD1.size<=0 || LC1.size<=0){
|
||||
//If we didn't detect L1 correctly before,
|
||||
int count;
|
||||
for (count=0;count <4;count++) {
|
||||
cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
|
||||
switch (eax &0x1f) {
|
||||
case 0:
|
||||
continue;
|
||||
case 1:
|
||||
case 3:
|
||||
{
|
||||
switch ((eax >>5) &0x07)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
// fprintf(stderr,"L1 data cache...\n");
|
||||
int sets = ecx+1;
|
||||
int lines = (ebx & 0x0fff) +1;
|
||||
ebx>>=12;
|
||||
int part = (ebx&0x03ff)+1;
|
||||
ebx >>=10;
|
||||
int assoc = (ebx&0x03ff)+1;
|
||||
LD1.size = (assoc*part*lines*sets)/1024;
|
||||
LD1.associative = assoc;
|
||||
LD1.linesize= lines;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
switch ((eax >>5) &0x07)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
// fprintf(stderr,"L1 instruction cache...\n");
|
||||
int sets = ecx+1;
|
||||
int lines = (ebx & 0x0fff) +1;
|
||||
ebx>>=12;
|
||||
int part = (ebx&0x03ff)+1;
|
||||
ebx >>=10;
|
||||
int assoc = (ebx&0x03ff)+1;
|
||||
LC1.size = (assoc*part*lines*sets)/1024;
|
||||
LC1.associative = assoc;
|
||||
LC1.linesize= lines;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
|
||||
if (cpuid_level >= 0x80000006) {
|
||||
if(L2.size<=0){
|
||||
@@ -1047,9 +900,7 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
}
|
||||
}
|
||||
|
||||
if ((get_vendor() == VENDOR_AMD) ||
|
||||
(get_vendor() == VENDOR_HYGON) ||
|
||||
(get_vendor() == VENDOR_CENTAUR)) {
|
||||
if ((get_vendor() == VENDOR_AMD) || (get_vendor() == VENDOR_CENTAUR)) {
|
||||
cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
LDTB.size = 4096;
|
||||
@@ -1211,7 +1062,7 @@ int get_cpuname(void){
|
||||
return CPUTYPE_CORE2;
|
||||
}
|
||||
break;
|
||||
case 1: // family 6 exmodel 1
|
||||
case 1:
|
||||
switch (model) {
|
||||
case 6:
|
||||
return CPUTYPE_CORE2;
|
||||
@@ -1228,7 +1079,7 @@ int get_cpuname(void){
|
||||
return CPUTYPE_DUNNINGTON;
|
||||
}
|
||||
break;
|
||||
case 2: // family 6 exmodel 2
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 5:
|
||||
//Intel Core (Clarkdale) / Core (Arrandale)
|
||||
@@ -1257,11 +1108,8 @@ int get_cpuname(void){
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 3: // family 6 exmodel 3
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 7:
|
||||
// Bay Trail
|
||||
return CPUTYPE_ATOM;
|
||||
case 10:
|
||||
case 14:
|
||||
// Ivy Bridge
|
||||
@@ -1271,137 +1119,86 @@ int get_cpuname(void){
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 12:
|
||||
case 15:
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 13:
|
||||
//Broadwell
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 4: // family 6 exmodel 4
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
case 6:
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 7:
|
||||
case 15:
|
||||
//Broadwell
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 14:
|
||||
//Skylake
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 12:
|
||||
// Braswell
|
||||
case 13:
|
||||
// Avoton
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 5: // family 6 exmodel 5
|
||||
case 5:
|
||||
switch (model) {
|
||||
case 6:
|
||||
//Broadwell
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 5:
|
||||
// Skylake X
|
||||
if(support_avx512())
|
||||
return CPUTYPE_SKYLAKEX;
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 14:
|
||||
// Skylake
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 7:
|
||||
// Xeon Phi Knights Landing
|
||||
if(support_avx2())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 12:
|
||||
// Apollo Lake
|
||||
case 15:
|
||||
// Denverton
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 6: // family 6 exmodel 6
|
||||
switch (model) {
|
||||
case 6: // Cannon Lake
|
||||
if(support_avx512())
|
||||
return CPUTYPE_SKYLAKEX;
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 7: // family 6 exmodel 7
|
||||
switch (model) {
|
||||
case 14: // Ice Lake
|
||||
if(support_avx512())
|
||||
return CPUTYPE_SKYLAKEX;
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 9:
|
||||
case 8:
|
||||
switch (model) {
|
||||
case 14: // Kaby Lake and refreshes
|
||||
if(support_avx2())
|
||||
return CPUTYPE_HASWELL;
|
||||
if(support_avx())
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
@@ -1432,12 +1229,8 @@ int get_cpuname(void){
|
||||
case 2:
|
||||
return CPUTYPE_OPTERON;
|
||||
case 1:
|
||||
case 3:
|
||||
case 7:
|
||||
case 10:
|
||||
return CPUTYPE_BARCELONA;
|
||||
case 5:
|
||||
return CPUTYPE_BOBCAT;
|
||||
case 6:
|
||||
switch (model) {
|
||||
case 1:
|
||||
@@ -1452,13 +1245,7 @@ int get_cpuname(void){
|
||||
return CPUTYPE_PILEDRIVER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
case 5: // New EXCAVATOR CPUS
|
||||
if(support_avx())
|
||||
return CPUTYPE_EXCAVATOR;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
case 0:
|
||||
case 8:
|
||||
switch(exmodel){
|
||||
case 1: //AMD Trinity
|
||||
if(support_avx())
|
||||
@@ -1480,47 +1267,14 @@ int get_cpuname(void){
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 8:
|
||||
switch (model) {
|
||||
case 1:
|
||||
// AMD Ryzen
|
||||
case 8:
|
||||
// AMD Ryzen2
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_ZEN;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_BARCELONA;
|
||||
}
|
||||
case 5:
|
||||
return CPUTYPE_BOBCAT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return CPUTYPE_AMD_UNKNOWN;
|
||||
}
|
||||
|
||||
if (vendor == VENDOR_HYGON){
|
||||
switch (family) {
|
||||
case 0xf:
|
||||
switch (exfamily) {
|
||||
case 9:
|
||||
//Hygon Dhyana
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_ZEN;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE; // closer in architecture to Sandy Bridge than to Excavator
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_BARCELONA;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return CPUTYPE_HYGON_UNKNOWN;
|
||||
}
|
||||
|
||||
if (vendor == VENDOR_CYRIX){
|
||||
switch (family) {
|
||||
case 0x4:
|
||||
@@ -1641,9 +1395,6 @@ static char *cpuname[] = {
|
||||
"HASWELL",
|
||||
"STEAMROLLER",
|
||||
"EXCAVATOR",
|
||||
"ZEN",
|
||||
"SKYLAKEX",
|
||||
"DHYANA"
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
@@ -1697,13 +1448,10 @@ static char *lowercpuname[] = {
|
||||
"haswell",
|
||||
"steamroller",
|
||||
"excavator",
|
||||
"zen",
|
||||
"skylakex",
|
||||
"dhyana"
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
"UNKNOWN",
|
||||
"UNKOWN",
|
||||
"80486",
|
||||
"P5",
|
||||
"P6",
|
||||
@@ -1730,9 +1478,6 @@ static char *corename[] = {
|
||||
"HASWELL",
|
||||
"STEAMROLLER",
|
||||
"EXCAVATOR",
|
||||
"ZEN",
|
||||
"SKYLAKEX",
|
||||
"DHYANA"
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
@@ -1763,9 +1508,6 @@ static char *corename_lower[] = {
|
||||
"haswell",
|
||||
"steamroller",
|
||||
"excavator",
|
||||
"zen",
|
||||
"skylakex",
|
||||
"dhyana"
|
||||
};
|
||||
|
||||
|
||||
@@ -1871,8 +1613,6 @@ int get_coretype(void){
|
||||
break;
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 7:
|
||||
return CORE_ATOM;
|
||||
case 10:
|
||||
case 14:
|
||||
if(support_avx())
|
||||
@@ -1934,11 +1674,6 @@ int get_coretype(void){
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 12:
|
||||
// Braswell
|
||||
case 13:
|
||||
// Avoton
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
@@ -1954,19 +1689,6 @@ int get_coretype(void){
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 5:
|
||||
// Skylake X
|
||||
#ifndef NO_AVX512
|
||||
return CORE_SKYLAKEX;
|
||||
#else
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
#endif
|
||||
case 14:
|
||||
// Skylake
|
||||
if(support_avx())
|
||||
@@ -1977,33 +1699,8 @@ int get_coretype(void){
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 7:
|
||||
// Phi Knights Landing
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 12:
|
||||
// Apollo Lake
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 9:
|
||||
case 8:
|
||||
if (model == 14) { // Kaby Lake
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -2033,13 +1730,8 @@ int get_coretype(void){
|
||||
return CORE_PILEDRIVER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
case 5: // New EXCAVATOR
|
||||
if(support_avx())
|
||||
return CORE_EXCAVATOR;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
|
||||
case 0:
|
||||
case 8:
|
||||
switch(exmodel){
|
||||
case 1: //AMD Trinity
|
||||
if(support_avx())
|
||||
@@ -2061,41 +1753,9 @@ int get_coretype(void){
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else if (exfamily == 8) {
|
||||
switch (model) {
|
||||
case 1:
|
||||
// AMD Ryzen
|
||||
case 8:
|
||||
// Ryzen 2
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_ZEN;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
|
||||
#endif
|
||||
else
|
||||
return CORE_BARCELONA;
|
||||
}
|
||||
} else {
|
||||
return CORE_BARCELONA;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vendor == VENDOR_HYGON){
|
||||
if (family == 0xf){
|
||||
if (exfamily == 9) {
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_ZEN;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE; // closer in architecture to Sandy Bridge than to Excavator
|
||||
#endif
|
||||
else
|
||||
return CORE_BARCELONA;
|
||||
} else {
|
||||
return CORE_BARCELONA;
|
||||
}
|
||||
|
||||
}else return CORE_BARCELONA;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2185,8 +1845,6 @@ void get_cpuconfig(void){
|
||||
if (features & HAVE_SSE4A) printf("#define HAVE_SSE4A\n");
|
||||
if (features & HAVE_SSE5 ) printf("#define HAVE_SSSE5\n");
|
||||
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
|
||||
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
|
||||
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
|
||||
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
|
||||
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
|
||||
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
|
||||
@@ -2255,8 +1913,6 @@ void get_sse(void){
|
||||
if (features & HAVE_SSE4A) printf("HAVE_SSE4A=1\n");
|
||||
if (features & HAVE_SSE5 ) printf("HAVE_SSSE5=1\n");
|
||||
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
|
||||
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
|
||||
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
|
||||
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
|
||||
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
|
||||
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");
|
||||
|
||||
127
cpuid_zarch.c
127
cpuid_zarch.c
@@ -1,127 +0,0 @@
|
||||
/**************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define CPU_GENERIC 0
|
||||
#define CPU_Z13 1
|
||||
#define CPU_Z14 2
|
||||
|
||||
static char *cpuname[] = {
|
||||
"ZARCH_GENERIC",
|
||||
"Z13",
|
||||
"Z14"
|
||||
};
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
"zarch_generic",
|
||||
"z13",
|
||||
"z14"
|
||||
};
|
||||
|
||||
int detect(void)
|
||||
{
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
|
||||
p = (char *)NULL;
|
||||
infile = fopen("/proc/sysinfo", "r");
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("Type", buffer, 4)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
#if 0
|
||||
fprintf(stderr, "%s\n", p);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
if (strstr(p, "2964")) return CPU_Z13;
|
||||
if (strstr(p, "2965")) return CPU_Z13;
|
||||
if (strstr(p, "3906")) return CPU_Z14;
|
||||
if (strstr(p, "3907")) return CPU_Z14;
|
||||
|
||||
return CPU_GENERIC;
|
||||
}
|
||||
|
||||
void get_libname(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
printf("%s", cpuname_lower[d]);
|
||||
}
|
||||
|
||||
char *get_corename(void)
|
||||
{
|
||||
return cpuname[detect()];
|
||||
}
|
||||
|
||||
void get_architecture(void)
|
||||
{
|
||||
printf("ZARCH");
|
||||
}
|
||||
|
||||
void get_subarchitecture(void)
|
||||
{
|
||||
int d = detect();
|
||||
printf("%s", cpuname[d]);
|
||||
}
|
||||
|
||||
void get_subdirname(void)
|
||||
{
|
||||
printf("zarch");
|
||||
}
|
||||
|
||||
|
||||
void get_cpuconfig(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d){
|
||||
case CPU_GENERIC:
|
||||
printf("#define ZARCH_GENERIC\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
break;
|
||||
case CPU_Z13:
|
||||
printf("#define Z13\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
break;
|
||||
case CPU_Z14:
|
||||
printf("#define Z14\n");
|
||||
printf("#define L1_DATA_SIZE 131072\n");
|
||||
printf("#define L1_DATA_LINESIZE 256\n");
|
||||
printf("#define L1_DATA_ASSOCIATIVE 8\n");
|
||||
printf("#define L2_SIZE 4194304\n");
|
||||
printf("#define L2_LINESIZE 256\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
20
ctest.c
20
ctest.c
@@ -60,14 +60,6 @@ OS_FREEBSD
|
||||
OS_NETBSD
|
||||
#endif
|
||||
|
||||
#if defined(__OpenBSD__)
|
||||
OS_OPENBSD
|
||||
#endif
|
||||
|
||||
#if defined(__DragonFly__)
|
||||
OS_DRAGONFLY
|
||||
#endif
|
||||
|
||||
#if defined(__sun)
|
||||
OS_SUNOS
|
||||
#endif
|
||||
@@ -101,10 +93,6 @@ OS_INTERIX
|
||||
OS_LINUX
|
||||
#endif
|
||||
|
||||
#if defined(__HAIKU__)
|
||||
OS_HAIKU
|
||||
#endif
|
||||
|
||||
#if defined(__i386) || defined(_X86)
|
||||
ARCH_X86
|
||||
#endif
|
||||
@@ -113,20 +101,16 @@ ARCH_X86
|
||||
ARCH_X86_64
|
||||
#endif
|
||||
|
||||
#if defined(__powerpc___) || defined(__PPC__) || defined(_POWER) || defined(__POWERPC__)
|
||||
#if defined(__powerpc___) || defined(__PPC__) || defined(_POWER)
|
||||
ARCH_POWER
|
||||
#endif
|
||||
|
||||
#if defined(__s390x__) || defined(__zarch__)
|
||||
ARCH_ZARCH
|
||||
#endif
|
||||
|
||||
#ifdef __mips64
|
||||
ARCH_MIPS64
|
||||
#endif
|
||||
|
||||
#if defined(__mips32) || defined(__mips)
|
||||
ARCH_MIPS
|
||||
ARCH_MIPS32
|
||||
#endif
|
||||
|
||||
#ifdef __alpha
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
include_directories(${PROJECT_SOURCE_DIR})
|
||||
include_directories(${PROJECT_BINARY_DIR})
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
|
||||
enable_language(Fortran)
|
||||
|
||||
@@ -16,7 +15,7 @@ foreach(float_type ${FLOAT_TYPES})
|
||||
add_executable(x${float_char}cblat1
|
||||
c_${float_char}blat1.f
|
||||
c_${float_char}blas1.c)
|
||||
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME})
|
||||
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static)
|
||||
add_test(NAME "x${float_char}cblat1"
|
||||
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1")
|
||||
|
||||
@@ -28,7 +27,7 @@ foreach(float_type ${FLOAT_TYPES})
|
||||
auxiliary.c
|
||||
c_xerbla.c
|
||||
constant.c)
|
||||
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME})
|
||||
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static)
|
||||
add_test(NAME "x${float_char}cblat2"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2")
|
||||
|
||||
@@ -40,7 +39,7 @@ foreach(float_type ${FLOAT_TYPES})
|
||||
auxiliary.c
|
||||
c_xerbla.c
|
||||
constant.c)
|
||||
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME})
|
||||
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static)
|
||||
add_test(NAME "x${float_char}cblat3"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3")
|
||||
|
||||
|
||||
@@ -6,8 +6,6 @@ TOPDIR = ..
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
override CFLAGS += -DADD$(BU) -DCBLAS
|
||||
override TARGET_ARCH=
|
||||
override TARGET_MACH=
|
||||
|
||||
LIB = $(TOPDIR)/$(LIBNAME)
|
||||
|
||||
@@ -44,7 +42,6 @@ ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o
|
||||
all :: all1 all2 all3
|
||||
|
||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
||||
ifndef CROSS
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xscblat1
|
||||
OMP_NUM_THREADS=2 ./xdcblat1
|
||||
@@ -56,10 +53,8 @@ else
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat1
|
||||
endif
|
||||
endif
|
||||
|
||||
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
|
||||
ifndef CROSS
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xscblat2 < sin2
|
||||
OMP_NUM_THREADS=2 ./xdcblat2 < din2
|
||||
@@ -71,10 +66,8 @@ else
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
|
||||
endif
|
||||
endif
|
||||
|
||||
all3: xscblat3 xdcblat3 xccblat3 xzcblat3
|
||||
ifndef CROSS
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
OMP_NUM_THREADS=2 ./xscblat3 < sin3
|
||||
OMP_NUM_THREADS=2 ./xdcblat3 < din3
|
||||
@@ -95,7 +88,6 @@ else
|
||||
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
|
||||
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
@@ -104,13 +96,7 @@ clean ::
|
||||
rm -f x*
|
||||
|
||||
FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CEXTRALIB = -lomp
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
CEXTRALIB =
|
||||
|
||||
# Single real
|
||||
xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include "common.h"
|
||||
#include "cblas_test.h"
|
||||
|
||||
void F77_caxpy(const int *N, OPENBLAS_CONST void *alpha, void *X,
|
||||
void F77_caxpy(const int *N, const void *alpha, void *X,
|
||||
const int *incX, void *Y, const int *incY)
|
||||
{
|
||||
cblas_caxpy(*N, alpha, X, *incX, Y, *incY);
|
||||
@@ -58,13 +58,13 @@ void F77_cswap( const int *N, void *X, const int *incX,
|
||||
return;
|
||||
}
|
||||
|
||||
int F77_icamax(const int *N, OPENBLAS_CONST void *X, const int *incX)
|
||||
int F77_icamax(const int *N, const void *X, const int *incX)
|
||||
{
|
||||
if (*N < 1 || *incX < 1) return(0);
|
||||
return (cblas_icamax(*N, X, *incX)+1);
|
||||
}
|
||||
|
||||
float F77_scnrm2(const int *N, OPENBLAS_CONST void *X, const int *incX)
|
||||
float F77_scnrm2(const int *N, const void *X, const int *incX)
|
||||
{
|
||||
return cblas_scnrm2(*N, X, *incX);
|
||||
}
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
#include "cblas_test.h"
|
||||
|
||||
void F77_cgemv(int *order, char *transp, int *m, int *n,
|
||||
OPENBLAS_CONST void *alpha,
|
||||
CBLAS_TEST_COMPLEX *a, int *lda, OPENBLAS_CONST void *x, int *incx,
|
||||
OPENBLAS_CONST void *beta, void *y, int *incy) {
|
||||
const void *alpha,
|
||||
CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx,
|
||||
const void *beta, void *y, int *incy) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A;
|
||||
int i,j,LDA;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user