Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5f998efd7b | ||
|
|
88a35ff457 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,7 +5,6 @@
|
||||
*.def
|
||||
*.o
|
||||
*.out
|
||||
*.tmp
|
||||
lapack-3.1.1
|
||||
lapack-3.1.1.tgz
|
||||
lapack-3.4.1
|
||||
|
||||
234
.travis.yml
234
.travis.yml
@@ -1,210 +1,5 @@
|
||||
# XXX: Precise is already deprecated, new default is Trusty.
|
||||
# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming
|
||||
dist: precise
|
||||
sudo: true
|
||||
language: c
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- &test-ubuntu
|
||||
os: linux
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gfortran
|
||||
before_script: &common-before
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
script:
|
||||
- set -e
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
compiler: clang
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 CC=clang"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
compiler: clang
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 INTERFACE64=1 CC=clang"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gcc-multilib
|
||||
- gfortran-multilib
|
||||
env:
|
||||
- TARGET_BOX=LINUX32
|
||||
- BTYPE="BINARY=32"
|
||||
|
||||
- os: linux
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- binutils-mingw-w64-x86-64
|
||||
- gcc-mingw-w64-x86-64
|
||||
- gfortran-mingw-w64-x86-64
|
||||
before_script: *common-before
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- TARGET_BOX=WIN64
|
||||
- BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
||||
# Build & test on Alpine Linux inside chroot, i.e. on system with musl libc.
|
||||
# These jobs needs sudo, so Travis runs them on VM-based infrastructure
|
||||
# which is slower than container-based infrastructure used for jobs
|
||||
# that don't require sudo.
|
||||
- &test-alpine
|
||||
os: linux
|
||||
dist: trusty
|
||||
sudo: true
|
||||
language: minimal
|
||||
before_install:
|
||||
- "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
|
||||
&& echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
|
||||
- alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
|
||||
install:
|
||||
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
|
||||
before_script: *common-before
|
||||
script:
|
||||
- set -e
|
||||
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
|
||||
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
|
||||
- alpine make -C test $COMMON_FLAGS $BTYPE
|
||||
- alpine make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- alpine make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- TARGET_BOX=LINUX64_MUSL
|
||||
- BTYPE="BINARY=64"
|
||||
|
||||
# XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS,
|
||||
# but only on Travis CI, cannot reproduce it elsewhere.
|
||||
#- &test-alpine-openmp
|
||||
# <<: *test-alpine
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-alpine
|
||||
env:
|
||||
- TARGET_BOX=LINUX64_MUSL
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
|
||||
# Build with the same flags as Alpine do in OpenBLAS package.
|
||||
- <<: *test-alpine
|
||||
env:
|
||||
- TARGET_BOX=LINUX64_MUSL
|
||||
- BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"
|
||||
|
||||
- &test-cmake
|
||||
os: linux
|
||||
compiler: clang
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gfortran
|
||||
- cmake
|
||||
dist: trusty
|
||||
sudo: true
|
||||
before_script:
|
||||
- COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
|
||||
script:
|
||||
- set -e
|
||||
- mkdir build
|
||||
- CONFIG=Release
|
||||
- cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
|
||||
- cmake --build build --config $CONFIG -- -j2
|
||||
env:
|
||||
- CMAKE=1
|
||||
- <<: *test-cmake
|
||||
env:
|
||||
- CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1"
|
||||
- <<: *test-cmake
|
||||
compiler: gcc
|
||||
env:
|
||||
- CMAKE=1
|
||||
|
||||
- &test-macos
|
||||
os: osx
|
||||
osx_image: xcode8
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
- brew update
|
||||
- brew install gcc # for gfortran
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
|
||||
- <<: *test-macos
|
||||
env:
|
||||
- BTYPE="BINARY=32"
|
||||
|
||||
- &emulated-arm
|
||||
dist: trusty
|
||||
sudo: required
|
||||
services: docker
|
||||
env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=gcc
|
||||
name: "Emulated Build for ARMV6 with gcc"
|
||||
before_install: sudo docker run --rm --privileged multiarch/qemu-user-static:register --reset
|
||||
script: |
|
||||
echo "FROM openblas/alpine:${IMAGE_ARCH}
|
||||
COPY . /tmp/openblas
|
||||
RUN mkdir /tmp/openblas/build && \
|
||||
cd /tmp/openblas/build && \
|
||||
CC=${COMPILER} cmake -D DYNAMIC_ARCH=OFF \
|
||||
-D TARGET=${TARGET_ARCH} \
|
||||
-D BUILD_SHARED_LIBS=ON \
|
||||
-D BUILD_WITHOUT_LAPACK=ON \
|
||||
-D BUILD_WITHOUT_CBLAS=ON \
|
||||
-D CMAKE_BUILD_TYPE=Release ../ && \
|
||||
cmake --build ." > Dockerfile
|
||||
docker build .
|
||||
- <<: *emulated-arm
|
||||
env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=clang
|
||||
name: "Emulated Build for ARMV6 with clang"
|
||||
- <<: *emulated-arm
|
||||
env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=gcc
|
||||
name: "Emulated Build for ARMV8 with gcc"
|
||||
- <<: *emulated-arm
|
||||
env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=clang
|
||||
name: "Emulated Build for ARMV8 with clang"
|
||||
|
||||
allow_failures:
|
||||
- env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=gcc
|
||||
- env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=clang
|
||||
- env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=gcc
|
||||
- env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=clang
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
|
||||
notifications:
|
||||
webhooks:
|
||||
urls:
|
||||
@@ -212,3 +7,32 @@ notifications:
|
||||
on_success: change # options: [always|never|change] default: always
|
||||
on_failure: always # options: [always|never|change] default: always
|
||||
on_start: never # options: [always|never|change] default: always
|
||||
|
||||
compiler:
|
||||
- gcc
|
||||
|
||||
env:
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1"
|
||||
- TARGET_BOX=LINUX32 BTYPE="BINARY=32"
|
||||
- TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
||||
before_install:
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq gfortran
|
||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||
|
||||
script:
|
||||
- set -e
|
||||
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
339
CMakeLists.txt
339
CMakeLists.txt
@@ -3,39 +3,52 @@
|
||||
##
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.5)
|
||||
project(OpenBLAS C ASM)
|
||||
project(OpenBLAS)
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 5)
|
||||
set(OpenBLAS_MINOR_VERSION 2)
|
||||
set(OpenBLAS_PATCH_VERSION 20)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
enable_language(ASM)
|
||||
enable_language(C)
|
||||
|
||||
# Adhere to GNU filesystem layout conventions
|
||||
include(GNUInstallDirs)
|
||||
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
if(MSVC)
|
||||
set(OpenBLAS_LIBNAME libopenblas)
|
||||
else()
|
||||
set(OpenBLAS_LIBNAME openblas)
|
||||
endif()
|
||||
|
||||
#######
|
||||
if(MSVC)
|
||||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||
endif()
|
||||
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF)
|
||||
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64 only)" OFF)
|
||||
option(DYNAMIC_OLDER "Include specific support for older cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
|
||||
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoids conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
|
||||
set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in the shared library to avoid conflicts with other BLAS libraries" )
|
||||
set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" )
|
||||
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
|
||||
option(BUILD_DEBUG "Build Debug Version" OFF)
|
||||
#######
|
||||
if(BUILD_WITHOUT_LAPACK)
|
||||
set(NO_LAPACK 1)
|
||||
set(NO_LAPACKE 1)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CONFIGURATION_TYPES) # multiconfig generator?
|
||||
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
|
||||
set(CMAKE_BUILD_TYPE
|
||||
Debug Debug
|
||||
Release Release
|
||||
)
|
||||
else()
|
||||
if( NOT CMAKE_BUILD_TYPE )
|
||||
if(BUILD_DEBUG)
|
||||
set(CMAKE_BUILD_TYPE Debug)
|
||||
else()
|
||||
set(CMAKE_BUILD_TYPE Release)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_WITHOUT_CBLAS)
|
||||
set(NO_CBLAS 1)
|
||||
endif()
|
||||
@@ -43,13 +56,11 @@ endif()
|
||||
#######
|
||||
|
||||
|
||||
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")
|
||||
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
|
||||
|
||||
set(OpenBLAS_LIBNAME openblas${SUFFIX64_UNDERSCORE})
|
||||
|
||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
|
||||
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
@@ -63,9 +74,6 @@ endif ()
|
||||
set(SUBDIRS ${BLASDIRS})
|
||||
if (NOT NO_LAPACK)
|
||||
list(APPEND SUBDIRS lapack)
|
||||
if(BUILD_RELAPACK)
|
||||
list(APPEND SUBDIRS relapack/src)
|
||||
endif()
|
||||
endif ()
|
||||
|
||||
# set which float types we want to build for
|
||||
@@ -77,10 +85,6 @@ if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_
|
||||
set(BUILD_COMPLEX16 true)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED BUILD_MATGEN)
|
||||
set(BUILD_MATGEN true)
|
||||
endif()
|
||||
|
||||
set(FLOAT_TYPES "")
|
||||
if (BUILD_SINGLE)
|
||||
message(STATUS "Building Single Precision")
|
||||
@@ -102,10 +106,19 @@ if (BUILD_COMPLEX16)
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||
endif ()
|
||||
|
||||
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
|
||||
|
||||
# all :: libs netlib tests shared
|
||||
|
||||
# libs :
|
||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
|
||||
endif ()
|
||||
|
||||
if (${NO_STATIC} AND ${NO_SHARED})
|
||||
message(FATAL_ERROR "Neither static nor shared are enabled.")
|
||||
endif ()
|
||||
|
||||
#Set default output directory
|
||||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
@@ -127,210 +140,134 @@ endforeach ()
|
||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
||||
if (NOT NOFORTRAN AND NOT NO_LAPACK)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
|
||||
if (NOT NO_LAPACKE)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||
endif ()
|
||||
if (NOT NO_LAPACKE)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# Only generate .def for dll on MSVC and always produce pdb files for debug and release
|
||||
if(MSVC)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
|
||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
|
||||
endif()
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
|
||||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
|
||||
endif()
|
||||
|
||||
if (${DYNAMIC_ARCH})
|
||||
add_subdirectory(kernel)
|
||||
foreach(TARGET_CORE ${DYNAMIC_CORE})
|
||||
message("${TARGET_CORE}")
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:kernel_${TARGET_CORE}>")
|
||||
endforeach()
|
||||
endif ()
|
||||
|
||||
# Only build shared libs for MSVC
|
||||
if (MSVC)
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
endif()
|
||||
|
||||
|
||||
# add objects to the openblas lib
|
||||
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include>)
|
||||
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
|
||||
# Android needs to explicitly link against libm
|
||||
if(ANDROID)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} m)
|
||||
endif()
|
||||
|
||||
# Handle MSVC exports
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
|
||||
else()
|
||||
# Creates verbose .def file (51KB vs 18KB)
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true)
|
||||
endif()
|
||||
endif()
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
|
||||
|
||||
# Set output for libopenblas
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES EXPORT_NAME "OpenBLAS")
|
||||
|
||||
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
|
||||
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
|
||||
|
||||
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
endforeach()
|
||||
|
||||
enable_testing()
|
||||
add_subdirectory(utest)
|
||||
|
||||
if (USE_THREAD)
|
||||
# Add threading library to linker
|
||||
find_package(Threads)
|
||||
if (THREADS_HAVE_PTHREAD_ARG)
|
||||
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY COMPILE_OPTIONS "-pthread")
|
||||
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread")
|
||||
endif()
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
|
||||
if (NOT MSVC)
|
||||
#only build shared library for MSVC
|
||||
|
||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
|
||||
|
||||
if(SMP)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
|
||||
endif()
|
||||
|
||||
if (MSVC OR NOT NOFORTRAN)
|
||||
# Broken without fortran on unix
|
||||
add_subdirectory(utest)
|
||||
#build test and ctest
|
||||
add_subdirectory(test)
|
||||
if(NOT NO_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT MSVC AND NOT NOFORTRAN)
|
||||
# Build test and ctest
|
||||
add_subdirectory(test)
|
||||
if(NOT NO_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
|
||||
SOVERSION ${OpenBLAS_MAJOR_VERSION}
|
||||
)
|
||||
|
||||
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
|
||||
if (NOT DEFINED ARCH)
|
||||
set(ARCH_IN "x86_64")
|
||||
else()
|
||||
set(ARCH_IN ${ARCH})
|
||||
endif()
|
||||
|
||||
if (${CORE} STREQUAL "generic")
|
||||
set(ARCH_IN "GENERIC")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED EXPRECISION)
|
||||
set(EXPRECISION_IN 0)
|
||||
else()
|
||||
set(EXPRECISION_IN ${EXPRECISION})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_CBLAS)
|
||||
set(NO_CBLAS_IN 0)
|
||||
else()
|
||||
set(NO_CBLAS_IN ${NO_CBLAS})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACK)
|
||||
set(NO_LAPACK_IN 0)
|
||||
else()
|
||||
set(NO_LAPACK_IN ${NO_LAPACK})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NO_LAPACKE)
|
||||
set(NO_LAPACKE_IN 0)
|
||||
else()
|
||||
set(NO_LAPACKE_IN ${NO_LAPACKE})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NEED2UNDERSCORES)
|
||||
set(NEED2UNDERSCORES_IN 0)
|
||||
else()
|
||||
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED ONLY_CBLAS)
|
||||
set(ONLY_CBLAS_IN 0)
|
||||
else()
|
||||
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED BU)
|
||||
set(BU _)
|
||||
endif()
|
||||
|
||||
if (NOT ${SYMBOLPREFIX} STREQUAL "")
|
||||
message(STATUS "adding prefix ${SYMBOLPREFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
|
||||
endif()
|
||||
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
|
||||
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
|
||||
endif()
|
||||
add_custom_command(TARGET ${OpenBLAS_LIBNAME} POST_BUILD
|
||||
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" > ${PROJECT_BINARY_DIR}/objcopy.def
|
||||
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
|
||||
COMMENT "renaming symbols"
|
||||
)
|
||||
endif()
|
||||
|
||||
# TODO: Why is the config saved here? Is this necessary with CMake?
|
||||
#Save the config files for installation
|
||||
# @cp Makefile.conf Makefile.conf_last
|
||||
# @cp config.h config_last.h
|
||||
#ifdef QUAD_PRECISION
|
||||
# @echo "#define QUAD_PRECISION">> config_last.h
|
||||
#endif
|
||||
#ifeq ($(EXPRECISION), 1)
|
||||
# @echo "#define EXPRECISION">> config_last.h
|
||||
#endif
|
||||
###
|
||||
#ifeq ($(DYNAMIC_ARCH), 1)
|
||||
# @$(MAKE) -C kernel commonlibs || exit 1
|
||||
# @for d in $(DYNAMIC_CORE) ; \
|
||||
# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
# done
|
||||
# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
#endif
|
||||
#ifdef USE_THREAD
|
||||
# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
#endif
|
||||
# @touch lib.grd
|
||||
|
||||
# Install project
|
||||
|
||||
# Install libraries
|
||||
install(TARGETS ${OpenBLAS_LIBNAME}
|
||||
EXPORT "OpenBLAS${SUFFIX64}Targets"
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
|
||||
# Install headers
|
||||
set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
|
||||
set(CMAKE_INSTALL_FULL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
|
||||
# Install include files
|
||||
set (GENCONFIG_BIN ${CMAKE_BINARY_DIR}/gen_config_h${CMAKE_EXECUTABLE_SUFFIX})
|
||||
ADD_CUSTOM_COMMAND(
|
||||
OUTPUT ${CMAKE_BINARY_DIR}/openblas_config.h
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h
|
||||
COMMAND ${GENCONFIG_BIN} ${CMAKE_CURRENT_SOURCE_DIR}/config.h ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h > ${CMAKE_BINARY_DIR}/openblas_config.h
|
||||
)
|
||||
|
||||
message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
ADD_CUSTOM_TARGET(genconfig
|
||||
ALL
|
||||
DEPENDS openblas_config.h
|
||||
)
|
||||
add_dependencies(genconfig ${OpenBLAS_LIBNAME})
|
||||
|
||||
set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h)
|
||||
file(WRITE ${OPENBLAS_CONFIG_H} "#ifndef OPENBLAS_CONFIG_H\n")
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_CONFIG_H\n")
|
||||
file(STRINGS ${PROJECT_BINARY_DIR}/config.h __lines)
|
||||
foreach(line ${__lines})
|
||||
string(REPLACE "#define " "" line ${line})
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_${line}\n")
|
||||
endforeach()
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_VERSION \"OpenBLAS ${OpenBLAS_VERSION}\"\n")
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS)
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "${OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS}\n")
|
||||
file(APPEND ${OPENBLAS_CONFIG_H} "#endif /* OPENBLAS_CONFIG_H */\n")
|
||||
install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
install (FILES ${CMAKE_BINARY_DIR}/openblas_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
if(NOT NOFORTRAN)
|
||||
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
set(F77BLAS_H ${CMAKE_BINARY_DIR}/f77blas.h)
|
||||
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n")
|
||||
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n")
|
||||
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n")
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS)
|
||||
file(APPEND ${F77BLAS_H} "${COMMON_INTERFACE_H_CONTENTS}\n")
|
||||
file(APPEND ${F77BLAS_H} "#endif")
|
||||
install (FILES ${F77BLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
ADD_CUSTOM_TARGET(genf77blas
|
||||
ALL
|
||||
COMMAND ${AWK} 'BEGIN{print \"\#ifndef OPENBLAS_F77BLAS_H\" \; print \"\#define OPENBLAS_F77BLAS_H\" \; print \"\#include \\"openblas_config.h\\" \"}; NF {print}; END{print \"\#endif\"}' ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h > ${CMAKE_BINARY_DIR}/f77blas.h
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h
|
||||
)
|
||||
add_dependencies(genf77blas ${OpenBLAS_LIBNAME})
|
||||
|
||||
install (FILES ${CMAKE_BINARY_DIR}/f77blas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
if(NOT NO_CBLAS)
|
||||
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
|
||||
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
file(WRITE ${CMAKE_BINARY_DIR}/cblas.tmp "${CBLAS_H_CONTENTS_NEW}")
|
||||
install (FILES ${CMAKE_BINARY_DIR}/cblas.tmp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} RENAME cblas.h)
|
||||
ADD_CUSTOM_TARGET(gencblas
|
||||
ALL
|
||||
COMMAND ${SED} 's/common/openblas_config/g' ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h > "${CMAKE_BINARY_DIR}/cblas.tmp"
|
||||
COMMAND cp "${CMAKE_BINARY_DIR}/cblas.tmp" "${CMAKE_BINARY_DIR}/cblas.h"
|
||||
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h
|
||||
)
|
||||
add_dependencies(gencblas ${OpenBLAS_LIBNAME})
|
||||
|
||||
install (FILES ${CMAKE_BINARY_DIR}/cblas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT NO_LAPACKE)
|
||||
@@ -338,35 +275,19 @@ if(NOT NO_LAPACKE)
|
||||
add_dependencies( ${OpenBLAS_LIBNAME} genlapacke)
|
||||
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
|
||||
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
|
||||
ADD_CUSTOM_TARGET(genlapacke
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
|
||||
COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
|
||||
)
|
||||
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
|
||||
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
install (TARGETS ${OpenBLAS_LIBNAME}_static DESTINATION ${CMAKE_INSTALL_LIBDIR})
|
||||
endif()
|
||||
|
||||
include(FindPkgConfig QUIET)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
|
||||
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY)
|
||||
install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
|
||||
endif()
|
||||
|
||||
|
||||
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
|
||||
set(PN OpenBLAS)
|
||||
set(CMAKECONFIG_INSTALL_DIR "share/cmake/${PN}${SUFFIX64}")
|
||||
configure_package_config_file(cmake/${PN}Config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake"
|
||||
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
|
||||
VERSION ${${PN}_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
|
||||
RENAME ${PN}${SUFFIX64}ConfigVersion.cmake
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
install(EXPORT "${PN}${SUFFIX64}Targets"
|
||||
NAMESPACE "${PN}${SUFFIX64}::"
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
|
||||
|
||||
@@ -166,5 +166,5 @@ In chronological order:
|
||||
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
|
||||
* [2017-02-26] ztrmm kernel for IBM z13
|
||||
* [2017-03-13] strmm and ctrmm kernel for IBM z13
|
||||
* [2017-09-01] initial Blas Level-1,2 (double precision) for IBM z13
|
||||
|
||||
|
||||
|
||||
243
Changelog.txt
243
Changelog.txt
@@ -1,247 +1,4 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.5
|
||||
31-Dec-2018
|
||||
|
||||
common:
|
||||
* loop unrolling in TRMV has been enabled again.
|
||||
* A domain error in the thread workload distribution for SYRK
|
||||
has been fixed.
|
||||
* gmake builds will now automatically add -fPIC to the build
|
||||
options if the platform requires it.
|
||||
* a pthreads key leakage (and associate crash on dlclose) in
|
||||
the USE_TLS codepath was fixed.
|
||||
* building of the utest cases on systems that do not provide
|
||||
an implementation of complex.h was fixed.
|
||||
|
||||
x86_64:
|
||||
* the SkylakeX code was changed to compile on OSX.
|
||||
* unwanted application of the -march=skylake-avx512 option
|
||||
to the common code parts of a DYNAMIC_ARCH build was fixed.
|
||||
* improved performance of SGEMM for small workloads on Skylake X.
|
||||
* performance of SGEMM and DGEMM was improved on Haswell.
|
||||
|
||||
ARMV8:
|
||||
* a configuration error that broke the CNRM2 kernel was corrected.
|
||||
* compilation of the GEMM kernels with CMAKE was fixed.
|
||||
* DYNAMIC_ARCH builds are now available with CMAKE as well.
|
||||
* using CMAKE for cross-compilation to the new cpu TARGETs
|
||||
introduced in 0.3.4 now works.
|
||||
|
||||
POWER:
|
||||
* a problem in cpu autodetection for AIX has been corrected.
|
||||
|
||||
====================================================================
|
||||
Version 0.3.4
|
||||
02-Dec-2018
|
||||
|
||||
common:
|
||||
* the new, experimental thread-local memory allocation had
|
||||
inadvertently been left enabled for gmake builds in 0.3.3
|
||||
despite the announcement. It is now disabled by default, and
|
||||
single-threaded builds will keep using the old allocator even
|
||||
if the USE_TLS option is turned on.
|
||||
* OpenBLAS will now provide enough buffer space for at least 50
|
||||
threads by default.
|
||||
* The output of openblas_get_config() now contains the version
|
||||
number.
|
||||
* A serious thread safety bug in GEMV operation with small M and
|
||||
large N size has been fixed.
|
||||
* The code will now automatically call blas_thread_init after a
|
||||
fork if needed before handling a call to openblas_set_num_threads
|
||||
* Accesses to parallelized level3 functions from multiple callers
|
||||
are now serialized to avoid thread races (unless using OpenMP).
|
||||
This should provide better performance than the known-threadsafe
|
||||
(but non-default) USE_SIMPLE_THREADED_LEVEL3 option.
|
||||
* When building LAPACK with gfortran, -frecursive is now (again)
|
||||
enabled by default to ensure correct behaviour.
|
||||
* The OpenBLAS version cblas.h now supports both CBLAS_ORDER and
|
||||
CBLAS_LAYOUT as the name of the matrix row/column order option.
|
||||
* Externally set LDFLAGS are now passed through to the final compile/link
|
||||
steps to facilitate setting platform-specific linker flags.
|
||||
* A potential race condition during the build of LAPACK (that would
|
||||
usually manifest itself as a failure to build TESTING/MATGEN) has been
|
||||
fixed.
|
||||
* xHEMV has been changed to stay single-threaded for small input sizes
|
||||
where the overhead of multithreading exceeds any possible gains
|
||||
* CSWAP and ZSWAP have been limited to a single thread except on ARMV8 or
|
||||
ThunderX hardware with sizable input.
|
||||
* Linker flags for the PGI compiler have been updated
|
||||
* Behaviour of AXPY with zero increments is now handled in the C interface,
|
||||
correcting the result on at least Intel Atom.
|
||||
* The result matrix from calling SGELSS with an all-zero input matrix is
|
||||
now zeroed completely.
|
||||
|
||||
x86_64:
|
||||
* Autodetection of AMD Ryzen2 has been fixed (again).
|
||||
* CMAKE builds now support labeling of an INTERFACE64=1 build of
|
||||
the library with the _64 suffix.
|
||||
* AVX512 version of DGEMM has been added and the AVX512 SGEMM kernel
|
||||
has been sped up by rewriting with C intrinsics
|
||||
* Fixed compilation on RHEL5/CENTOS5 (issue with typename __WAIT_STATUS)
|
||||
|
||||
POWER:
|
||||
* added support for building on AIX (with gcc and GNU tools from AIX Toolbox).
|
||||
* CPU type detection has been implemented for AIX.
|
||||
* CPU type detection has been fixed for NETBSD.
|
||||
|
||||
MIPS64:
|
||||
* AXPY on LOONGSON3A has been corrected to pass "zero increment" utest.
|
||||
* DSDOT on LOONGSON3A has been fixed.
|
||||
* the SGEMM microkernel has been hardened against potential data loss.
|
||||
|
||||
ARMV8:
|
||||
* DYNAMic_ARCH support is now available for 64bit ARM
|
||||
* cross-compiling for ARMV8 under iOS now works.
|
||||
* cpu-specific code has been rearranged to make better use of both
|
||||
hardware commonalities and model-specific compiler optimizations.
|
||||
* XGENE1 has been removed as a TARGET, superseded by the improved generic
|
||||
ARMV8 support.
|
||||
|
||||
ARMV7:
|
||||
* Older assembly mnemonics have been converted to UAL form to allow
|
||||
building with clang 7.0
|
||||
* Cross compiling LAPACKE for Android has been fixed again (broken by
|
||||
update to LAPACK 3.7.0 some while ago).
|
||||
|
||||
====================================================================
|
||||
Version 0.3.3
|
||||
31-Aug-2018
|
||||
|
||||
common:
|
||||
* thread memory allocation has been switched back to the method
|
||||
used before version 0.3.1 due to unexpected problems caused by
|
||||
the new code under some circumstances. A new compile-time option
|
||||
USE_TLS has been added to enable the new code, and it is hoped
|
||||
that this can become the default again in the next version.
|
||||
* LAPAck PR272 has been integrated, which fixes spurious errors
|
||||
in DSYEVR and related functions caused by missing conversion
|
||||
from ILAENV to ILAENV_2STAGE in several _2stage routines.
|
||||
* the cmake-generated OpenBLASConfig.cmake now uses correct case
|
||||
for the name of the library
|
||||
* added support for Haiku OS
|
||||
|
||||
x86_64:
|
||||
* added AVX512 implementations of SDOT, DDOT, SAXPY, DAXPY,
|
||||
DSCAL, DGEMVN and DSYMVL
|
||||
* added a workaround for a cygwin issue that prevented compilation
|
||||
of AVX512 code
|
||||
|
||||
IBM Z:
|
||||
* added autodetection of Z14
|
||||
* fixed TRMM errors in the generic target
|
||||
|
||||
====================================================================
|
||||
Version 0.3.2
|
||||
30-Jul-2018
|
||||
|
||||
common:
|
||||
* fixes for regressions caused by the rewrite of the thread
|
||||
initialization code in 0.3.1
|
||||
|
||||
POWER:
|
||||
* fixed cpu autodetection for the BSDs
|
||||
|
||||
MIPS64:
|
||||
* fixed utest errors in AXPY, DSDOT, ROT and SWAP
|
||||
|
||||
x86_64:
|
||||
* added autodetection of AMD Ryzen 2
|
||||
* fixed build with older versions of MSVC
|
||||
|
||||
====================================================================
|
||||
Version 0.3.1
|
||||
01-Jul-2018
|
||||
|
||||
common:
|
||||
* rewritten thread initialization code with significantly reduced overhead
|
||||
* added CBLAS interfaces to the IxAMIN BLAS extension functions
|
||||
* fixed the lapack-test target
|
||||
* CMAKE builds now create an OpenBLASConfig.cmake file
|
||||
* ZAXPY now uses a single thread for small input sizes
|
||||
* the LAPACK code was updated from Reference-LAPACK/lapack#253
|
||||
(fixing LAPACKE interfaces to Aasen's functions)
|
||||
|
||||
POWER:
|
||||
* corrected CROT and ZROT behaviour with zero INC_X
|
||||
|
||||
ARMV7:
|
||||
* corrected xDOT behaviour with zero INC_X or INC_Y
|
||||
|
||||
x86_64:
|
||||
* retired some older targets of DYNAMIC_ARCH builds to a new option DYNAMIC_OLDER,
|
||||
this affects PENRYN,DUNNINGTON,OPTERON,OPTERON_SSE3,BOBCAT,ATOM and NANO
|
||||
(which will still be supported via the slower PRESCOTT kernels when this option is not set)
|
||||
* added an option DYNAMIC_LIST that (used in conjunction with DYNAMIC_ARCH) allows to
|
||||
specify the list of x86_64 targets to include. Any target not on the list will be supported
|
||||
by the Sandybridge or Nehalem kernels if available, or by Prescott.
|
||||
* improved SWITCH_RATIO on Haswell for increased GEMM throughput
|
||||
* added initial support for Intel Skylake X, including an AVX512 SGEMM kernel
|
||||
* added autodetection of Intel Cannon Lake series as Skylake X
|
||||
* added a default L2 cache size for hypervisors that return zero here (Chromebook)
|
||||
* fixed a name clash with recent Windows10 headers that broke the build with (at least)
|
||||
recent mingw from MSYS2
|
||||
* fixed a link error in mixed clang/gfortran builds with OpenMP
|
||||
* updated the OSX deployment target to 10.8
|
||||
* switched on parallel make for builds on MS Windows by default
|
||||
|
||||
x86:
|
||||
* fixed SSWAP and DSWAP behaviour with zero INC_X and INC_Y
|
||||
|
||||
====================================================================
|
||||
Version 0.3.0
|
||||
23-May-2108
|
||||
|
||||
common:
|
||||
* fixed some more thread race and locking bugs
|
||||
* added preliminary support for calling an OpenMP build of the library from multiple threads
|
||||
* removed performance impact of thread locks added in 0.2.20 on OpenMP code
|
||||
* general code cleanup
|
||||
* optimized DSDOT implementation
|
||||
* improved thread distribution for GEMM
|
||||
* corrected IMATCOPY/OMATCOPY implementation
|
||||
* fixed out-of-bounds accesses in the multithreaded xBMV/xPMV and SYMV implementations
|
||||
* cmake build improvements
|
||||
* pkgconfig file now contains build options
|
||||
* openblas_get_config() now reports USE_OPENMP and NUM_THREADS settings used for the build
|
||||
* corrections and improvements for systems with more than 64 cpus
|
||||
* LAPACK code updated to 3.8.0 including later fixes
|
||||
* added ReLAPACK, a recursive implementation of several LAPACK functions
|
||||
* Rewrote ROTMG to handle cases that the netlib code failed to address
|
||||
* Disabled (broken) multithreading code for xTRMV
|
||||
* corrected prototypes of complex CBLAS functions to make our cblas.h match the generally accepted standard
|
||||
* shared memory access failures on startup are now handled more gracefully
|
||||
* restored utests from earlier releases (and made them pass on all affected systems)
|
||||
|
||||
SPARC:
|
||||
* several fixes for cpu autodetection
|
||||
|
||||
POWER:
|
||||
* corrected vector register overwriting in several Power8 kernels
|
||||
* optimized additional BLAS functions
|
||||
|
||||
ARM:
|
||||
* added support for CortexA53 and A72
|
||||
* added autodetection for ThunderX2T99
|
||||
* made most optimized kernels the default for generic ARMv8 targets
|
||||
|
||||
x86_64:
|
||||
* parallelized DDOT kernel for Haswell
|
||||
* changed alignment directives in assembly kernels to boost performance on OSX
|
||||
* fixed register handling in the GEMV microkernels (bug exposed by gcc7)
|
||||
* added support for building on OpenBSD and Dragonfly
|
||||
* updated compiler options to work with Intel release 2018
|
||||
* support fully optimized build with clang/flang on Microsoft Windows
|
||||
* fixed building on AIX
|
||||
|
||||
IBM Z:
|
||||
* added optimized BLAS 1/2 functions
|
||||
|
||||
MIPS:
|
||||
* fixed cpu autodetection helper code
|
||||
* added mips32 1004K cpu (Mediatek MT7621 and similar SoC)
|
||||
* added mips64 I6500 cpu
|
||||
|
||||
====================================================================
|
||||
Version 0.2.20
|
||||
24-Jul-2017
|
||||
|
||||
54
Makefile
54
Makefile
@@ -21,17 +21,6 @@ ifeq ($(BUILD_RELAPACK), 1)
|
||||
RELA = re_lapack
|
||||
endif
|
||||
|
||||
ifeq ($(NO_FORTRAN), 1)
|
||||
define NOFORTRAN
|
||||
1
|
||||
endef
|
||||
define NO_LAPACK
|
||||
1
|
||||
endef
|
||||
export NOFORTRAN
|
||||
export NO_LAPACK
|
||||
endif
|
||||
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
@@ -58,7 +47,7 @@ endif
|
||||
endif
|
||||
|
||||
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
|
||||
endif
|
||||
ifneq ($(OSNAME), AIX)
|
||||
@@ -97,12 +86,16 @@ endif
|
||||
|
||||
shared :
|
||||
ifndef NO_SHARED
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
|
||||
@$(MAKE) -C exports so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
@$(MAKE) -C exports so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
@$(MAKE) -C exports so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
@@ -119,7 +112,7 @@ endif
|
||||
endif
|
||||
|
||||
tests :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
touch $(LIBNAME)
|
||||
ifndef NO_FBLAS
|
||||
$(MAKE) -C test all
|
||||
@@ -131,7 +124,7 @@ endif
|
||||
endif
|
||||
|
||||
libs :
|
||||
ifeq ($(CORE), UNKNOWN)
|
||||
ifeq ($(CORE), UNKOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
@@ -164,9 +157,6 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
|
||||
done
|
||||
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
|
||||
endif
|
||||
endif
|
||||
ifdef USE_THREAD
|
||||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
@@ -221,7 +211,7 @@ netlib :
|
||||
|
||||
else
|
||||
netlib : lapack_prebuild
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
endif
|
||||
@@ -242,7 +232,7 @@ prof_lapack : lapack_prebuild
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
|
||||
|
||||
lapack_prebuild :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@@ -251,8 +241,8 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@@ -267,8 +257,6 @@ ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifdef SMP
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else ifeq ($(OSNAME), Haiku)
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
@@ -282,26 +270,25 @@ endif
|
||||
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
|
||||
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
||||
large.tgz :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/large.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
timing.tgz :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
if [ ! -a $< ]; then
|
||||
-wget http://www.netlib.org/lapack/timing/timing.tgz;
|
||||
fi
|
||||
endif
|
||||
|
||||
lapack-timing : large.tgz timing.tgz
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifndef NOFORTRAN
|
||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||
@@ -310,10 +297,9 @@ endif
|
||||
|
||||
lapack-test :
|
||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
ifneq ($(CROSS), 1)
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
endif
|
||||
@@ -325,9 +311,9 @@ lapack-runtest:
|
||||
|
||||
|
||||
blas-test:
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out)
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out)
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
|
||||
|
||||
|
||||
dummy :
|
||||
|
||||
@@ -4,37 +4,22 @@ CCOMMON_OPT += -march=armv8-a
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA53)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA57)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA72)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA73)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
ifeq ($(CORE), VULCAN)
|
||||
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
|
||||
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), FALKOR)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||
CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
|
||||
FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX2T99)
|
||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
CCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99
|
||||
FCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99
|
||||
endif
|
||||
|
||||
@@ -48,7 +48,6 @@ ifndef NO_CBLAS
|
||||
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||
endif
|
||||
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@@ -67,14 +66,18 @@ endif
|
||||
#for install shared library
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
|
||||
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@@ -87,50 +90,21 @@ ifeq ($(OSNAME), Darwin)
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-cp $(LIBDLLNAME).a "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
endif
|
||||
endif
|
||||
|
||||
else
|
||||
#install on AIX has different options syntax
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifndef NO_STATIC
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifndef NO_SHARED
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
#Generating openblas.pc
|
||||
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo Generating openblas.pc in $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)
|
||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
@echo 'version='$(VERSION) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
@echo 'extralib='$(EXTRALIB) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
@cat openblas.pc.in >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
|
||||
|
||||
|
||||
#Generating OpenBLASConfig.cmake
|
||||
@@ -140,7 +114,7 @@ endif
|
||||
|
||||
ifndef NO_SHARED
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
||||
|
||||
@@ -17,10 +17,6 @@ ifdef CPUIDEMU
|
||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), 1004K)
|
||||
TARGET_FLAGS = -mips32r2
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), P5600)
|
||||
TARGET_FLAGS = -mips32r5
|
||||
endif
|
||||
@@ -33,10 +29,6 @@ ifeq ($(TARGET), P6600)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), I6500)
|
||||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
all: getarch_2nd
|
||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.5
|
||||
VERSION = 0.2.20
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
@@ -17,11 +17,6 @@ VERSION = 0.3.5
|
||||
# If you want to support multiple architecture in one binary
|
||||
# DYNAMIC_ARCH = 1
|
||||
|
||||
# If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH
|
||||
# mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON,
|
||||
# OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures)
|
||||
# DYNAMIC_OLDER = 1
|
||||
|
||||
# C compiler including binary type(32bit / 64bit). Default is gcc.
|
||||
# Don't use Intel Compiler or PGI, it won't generate right codes as I expect.
|
||||
# CC = gcc
|
||||
@@ -60,26 +55,11 @@ VERSION = 0.3.5
|
||||
# This flag is always set for POWER8. Don't modify the flag
|
||||
# USE_OPENMP = 1
|
||||
|
||||
# The OpenMP scheduler to use - by default this is "static" and you
|
||||
# will normally not want to change this unless you know that your main
|
||||
# workload will involve tasks that have highly unbalanced running times
|
||||
# for individual threads. Changing away from "static" may also adversely
|
||||
# affect memory access locality in NUMA systems. Setting to "runtime" will
|
||||
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
|
||||
# CCOMMON_OPT += -DOMP_SCHED=dynamic
|
||||
|
||||
# You can define maximum number of threads. Basically it should be
|
||||
# less than actual number of cores. If you don't specify one, it's
|
||||
# automatically detected by the the script.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# If you have enabled USE_OPENMP and your application would call
|
||||
# OpenBLAS's calculation API from multi threads, please comment it in.
|
||||
# This flag defines how many instances of OpenBLAS's calculation API can
|
||||
# actually run in parallel. If more threads call OpenBLAS's calculation API,
|
||||
# they need to wait for the preceding API calls to finish or risk data corruption.
|
||||
# NUM_PARALLEL = 2
|
||||
|
||||
# if you don't need to install the static library, please comment it in.
|
||||
# NO_STATIC = 1
|
||||
|
||||
@@ -109,12 +89,6 @@ BUILD_LAPACK_DEPRECATED = 1
|
||||
# If you want to use legacy threaded Level 3 implementation.
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
||||
# If you want to use the new, still somewhat experimental code that uses
|
||||
# thread-local storage instead of a central memory buffer in memory.c
|
||||
# Note that if your system uses GLIBC, it needs to have at least glibc 2.21
|
||||
# for this to work.
|
||||
# USE_TLS = 1
|
||||
|
||||
# If you want to drive whole 64bit region by BLAS. Not all Fortran
|
||||
# compiler supports this. It's safe to keep comment it out if you
|
||||
# are not sure(equivalent to "-i8" option).
|
||||
@@ -126,7 +100,7 @@ BUILD_LAPACK_DEPRECATED = 1
|
||||
NO_WARMUP = 1
|
||||
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
NO_AFFINITY = 1
|
||||
#NO_AFFINITY = 1
|
||||
|
||||
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# BIGNUMA = 1
|
||||
@@ -152,9 +126,6 @@ NO_AFFINITY = 1
|
||||
# FUNCTION_PROFILE = 1
|
||||
|
||||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
|
||||
# This option should not be used - it is a holdover from unfinished code present
|
||||
# in the original GotoBLAS2 library that may be usable as a starting point but
|
||||
# is not even expected to compile in its present form.
|
||||
# QUAD_PRECISION = 1
|
||||
|
||||
# Theads are still working for a while after finishing BLAS operation
|
||||
@@ -173,11 +144,8 @@ NO_AFFINITY = 1
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
|
||||
# with single thread. (Actually in recent versions this is a factor proportional to the
|
||||
# number of floating point operations necessary for the given problem size, no longer
|
||||
# an individual dimension). You can use this setting to avoid the overhead of multi-
|
||||
# threading in small matrix sizes. The default value is 4, but values as high as 50 have
|
||||
# been reported to be optimal for certain workloads (50 is the recommended value for Julia).
|
||||
# with single thread. You can use this flag to avoid the overhead of multi-threading
|
||||
# in small matrix sizes. The default value is 4.
|
||||
# GEMM_MULTITHREAD_THRESHOLD = 4
|
||||
|
||||
# If you need santy check by comparing reference BLAS. It'll be very
|
||||
@@ -192,8 +160,8 @@ NO_AFFINITY = 1
|
||||
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
# gfortran option for LAPACK to improve thread-safety
|
||||
# It is enabled by default in Makefile.system for gfortran
|
||||
# gfortran option for LAPACK
|
||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
|
||||
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
|
||||
# FCOMMON_OPT = -frecursive
|
||||
|
||||
|
||||
157
Makefile.system
157
Makefile.system
@@ -9,17 +9,6 @@ ifndef TOPDIR
|
||||
TOPDIR = .
|
||||
endif
|
||||
|
||||
# Catch conflicting usage of ARCH in some BSD environments
|
||||
ifeq ($(ARCH), amd64)
|
||||
override ARCH=x86_64
|
||||
else ifeq ($(ARCH), powerpc64)
|
||||
override ARCH=power
|
||||
else ifeq ($(ARCH), i386)
|
||||
override ARCH=x86
|
||||
else ifeq ($(ARCH), aarch64)
|
||||
override ARCH=arm64
|
||||
endif
|
||||
|
||||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
||||
|
||||
# Default C compiler
|
||||
@@ -28,24 +17,15 @@ NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
||||
# http://stackoverflow.com/questions/4029274/mingw-and-make-variables
|
||||
# - Default value is 'cc' which is not always a valid command (e.g. MinGW).
|
||||
ifeq ($(origin CC),default)
|
||||
|
||||
# Check if $(CC) refers to a valid command and set the value to gcc if not
|
||||
ifneq ($(findstring cmd.exe,$(SHELL)),)
|
||||
ifeq ($(shell where $(CC) 2>NUL),)
|
||||
CC = gcc
|
||||
# Change the default compile to clang on Mac OSX.
|
||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CC = clang
|
||||
# EXTRALIB += -Wl,-no_compact_unwind
|
||||
endif
|
||||
endif
|
||||
else # POSIX-ish
|
||||
ifeq ($(shell command -v $(CC) 2>/dev/null),)
|
||||
ifeq ($(shell uname -s),Darwin)
|
||||
CC = clang
|
||||
# EXTRALIB += -Wl,-no_compact_unwind
|
||||
else
|
||||
CC = gcc
|
||||
endif # Darwin
|
||||
endif # CC exists
|
||||
endif # Shell is sane
|
||||
|
||||
endif # CC is set to default
|
||||
|
||||
# Default Fortran compiler (FC) is selected by f_check.
|
||||
|
||||
@@ -73,9 +53,6 @@ ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
@@ -109,9 +86,6 @@ ifeq ($(BINARY), 32)
|
||||
ifeq ($(TARGET_CORE), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
@@ -158,10 +132,6 @@ ifeq ($(NO_AVX2), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX2
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX512), 1)
|
||||
GETARCH_FLAGS += -DNO_AVX512
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
GETARCH_FLAGS += -g
|
||||
endif
|
||||
@@ -205,17 +175,12 @@ endif
|
||||
|
||||
endif
|
||||
|
||||
ifndef NUM_PARALLEL
|
||||
NUM_PARALLEL = 1
|
||||
endif
|
||||
|
||||
ifndef NUM_THREADS
|
||||
NUM_THREADS = $(NUM_CORES)
|
||||
endif
|
||||
|
||||
ifeq ($(NUM_THREADS), 1)
|
||||
override USE_THREAD = 0
|
||||
override USE_OPENMP = 0
|
||||
endif
|
||||
|
||||
ifdef USE_THREAD
|
||||
@@ -259,12 +224,12 @@ endif
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
ifndef MACOSX_DEPLOYMENT_TARGET
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.8
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.6
|
||||
endif
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
ifneq (,$(findstring $(OSNAME), FreeBSD OpenBSD DragonFly))
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
@@ -338,7 +303,6 @@ endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
NEED_PIC = 0
|
||||
NO_EXPRECISION = 1
|
||||
OS_CYGWIN_NT = 1
|
||||
endif
|
||||
|
||||
ifneq ($(OSNAME), WINNT)
|
||||
@@ -458,7 +422,7 @@ CCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), INTEL)
|
||||
CCOMMON_OPT += -fopenmp
|
||||
CCOMMON_OPT += -openmp
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
@@ -483,44 +447,13 @@ DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
DYNAMIC_CORE += PENRYN DUNNINGTON
|
||||
endif
|
||||
DYNAMIC_CORE += NEHALEM
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
DYNAMIC_CORE += OPTERON OPTERON_SSE3
|
||||
endif
|
||||
DYNAMIC_CORE += BARCELONA
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
DYNAMIC_CORE += BOBCAT ATOM NANO
|
||||
endif
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
|
||||
endif
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += HASWELL ZEN
|
||||
endif
|
||||
ifneq ($(NO_AVX512), 1)
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += SKYLAKEX
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef DYNAMIC_LIST
|
||||
override DYNAMIC_CORE = PRESCOTT $(DYNAMIC_LIST)
|
||||
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_PRESCOTT
|
||||
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
|
||||
CCOMMON_OPT += $(XCCOMMON_OPT)
|
||||
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), arm64)
|
||||
DYNAMIC_CORE = ARMV8
|
||||
DYNAMIC_CORE += CORTEXA57
|
||||
DYNAMIC_CORE += THUNDERX
|
||||
DYNAMIC_CORE += THUNDERX2T99
|
||||
endif
|
||||
|
||||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
||||
@@ -620,14 +553,9 @@ CCOMMON_OPT += -march=mips64
|
||||
FCOMMON_OPT += -march=mips64
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), 1004K)
|
||||
CCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), P5600)
|
||||
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), I6400)
|
||||
@@ -640,11 +568,6 @@ CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), I6500)
|
||||
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
BINARY_DEFINED = 1
|
||||
endif
|
||||
@@ -652,14 +575,12 @@ endif
|
||||
endif
|
||||
|
||||
ifndef BINARY_DEFINED
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifdef BINARY64
|
||||
CCOMMON_OPT += -m64
|
||||
else
|
||||
CCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
@@ -704,7 +625,6 @@ ifeq ($(F_COMPILER), G77)
|
||||
CCOMMON_OPT += -DF_INTERFACE_G77
|
||||
FCOMMON_OPT += -Wall
|
||||
ifndef NO_BINARY_MODE
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -m64
|
||||
else
|
||||
@@ -712,12 +632,10 @@ FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), G95)
|
||||
CCOMMON_OPT += -DF_INTERFACE_G95
|
||||
FCOMMON_OPT += -Wall
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifndef NO_BINARY_MODE
|
||||
ifdef BINARY64
|
||||
FCOMMON_OPT += -m64
|
||||
@@ -726,13 +644,10 @@ FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
CCOMMON_OPT += -DF_INTERFACE_GFORT
|
||||
FCOMMON_OPT += -Wall
|
||||
# make single-threaded LAPACK calls thread-safe #1847
|
||||
FCOMMON_OPT += -frecursive
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
EXTRALIB += -lgfortran
|
||||
@@ -749,20 +664,16 @@ FCOMMON_OPT += -mabi=32
|
||||
endif
|
||||
else
|
||||
ifdef BINARY64
|
||||
ifneq ($(OSNAME), AIX)
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
ifdef INTERFACE64
|
||||
ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -fdefault-integer-8
|
||||
endif
|
||||
endif
|
||||
else
|
||||
ifneq ($(OSNAME), AIX)
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
@@ -776,7 +687,7 @@ FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -956,10 +867,6 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
||||
CCOMMON_OPT += -DDYNAMIC_ARCH
|
||||
endif
|
||||
|
||||
ifeq ($(DYNAMIC_OLDER), 1)
|
||||
CCOMMON_OPT += -DDYNAMIC_OLDER
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
CCOMMON_OPT += -DNO_LAPACK
|
||||
#Disable LAPACK C interface
|
||||
@@ -982,10 +889,6 @@ ifeq ($(NO_AVX2), 1)
|
||||
CCOMMON_OPT += -DNO_AVX2
|
||||
endif
|
||||
|
||||
ifeq ($(NO_AVX512), 1)
|
||||
CCOMMON_OPT += -DNO_AVX512
|
||||
endif
|
||||
|
||||
ifdef SMP
|
||||
CCOMMON_OPT += -DSMP_SERVER
|
||||
|
||||
@@ -1032,18 +935,10 @@ endif
|
||||
|
||||
CCOMMON_OPT += -DMAX_CPU_NUMBER=$(NUM_THREADS)
|
||||
|
||||
CCOMMON_OPT += -DMAX_PARALLEL_NUMBER=$(NUM_PARALLEL)
|
||||
|
||||
ifdef USE_SIMPLE_THREADED_LEVEL3
|
||||
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
|
||||
endif
|
||||
|
||||
ifdef USE_TLS
|
||||
CCOMMON_OPT += -DUSE_TLS
|
||||
endif
|
||||
|
||||
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
|
||||
|
||||
ifndef SYMBOLPREFIX
|
||||
SYMBOLPREFIX =
|
||||
endif
|
||||
@@ -1053,15 +948,9 @@ SYMBOLSUFFIX =
|
||||
endif
|
||||
|
||||
ifndef LIBNAMESUFFIX
|
||||
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
|
||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
|
||||
else
|
||||
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
LIBPREFIX = cyg$(LIBNAMEBASE)
|
||||
else
|
||||
LIBPREFIX = lib$(LIBNAMEBASE)
|
||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
@@ -1154,6 +1043,8 @@ ifndef FCOMMON_OPT
|
||||
FCOMMON_OPT = -O2 -frecursive
|
||||
endif
|
||||
|
||||
|
||||
|
||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
|
||||
@@ -1161,12 +1052,6 @@ override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
ifdef NEED_PIC
|
||||
ifeq (,$(findstring PIC,$(FFLAGS)))
|
||||
override FFLAGS += -fPIC
|
||||
endif
|
||||
endif
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
ifdef OS_WINDOWS
|
||||
@@ -1224,12 +1109,7 @@ endif
|
||||
|
||||
|
||||
LIBDLLNAME = $(LIBPREFIX).dll
|
||||
IMPLIBNAME = lib$(LIBNAMEBASE).dll.a
|
||||
ifneq ($(OSNAME), AIX)
|
||||
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
|
||||
else
|
||||
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a)
|
||||
endif
|
||||
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
|
||||
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
|
||||
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
|
||||
@@ -1306,7 +1186,6 @@ export MSA_FLAGS
|
||||
export KERNELDIR
|
||||
export FUNCTION_PROFILE
|
||||
export TARGET_CORE
|
||||
export NO_AVX512
|
||||
|
||||
export SGEMM_UNROLL_M
|
||||
export SGEMM_UNROLL_N
|
||||
|
||||
@@ -8,34 +8,6 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), SKYLAKEX)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), HASWELL)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX2
|
||||
CCOMMON_OPT += -mavx2
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x64
|
||||
endif
|
||||
|
||||
237
README.md
237
README.md
@@ -2,224 +2,175 @@
|
||||
|
||||
[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
|
||||
|
||||
## Introduction
|
||||
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
Please read the documentation on the OpenBLAS wiki pages: <http://github.com/xianyi/OpenBLAS/wiki>.
|
||||
Please read the documents on OpenBLAS wiki pages <http://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
## Binary Packages
|
||||
|
||||
We provide official binary packages for the following platform:
|
||||
We provide binary packages for the following platform.
|
||||
|
||||
* Windows x86/x86_64
|
||||
|
||||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
|
||||
|
||||
## Installation from Source
|
||||
Download from project homepage. http://xianyi.github.com/OpenBLAS/
|
||||
|
||||
Download from project homepage, http://xianyi.github.com/OpenBLAS/, or check out the code
|
||||
using Git from https://github.com/xianyi/OpenBLAS.git.
|
||||
|
||||
### Dependencies
|
||||
|
||||
Building OpenBLAS requires the following to be installed:
|
||||
|
||||
* GNU Make
|
||||
* A C compiler, e.g. GCC or Clang
|
||||
* A Fortran compiler (optional, for LAPACK)
|
||||
* IBM MASS (optional, see below)
|
||||
|
||||
Or, check out codes from git://github.com/xianyi/OpenBLAS.git
|
||||
### Normal compile
|
||||
|
||||
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
|
||||
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
|
||||
The full target list is in the file `TargetList.txt`.
|
||||
* type "make" to detect the CPU automatically.
|
||||
or
|
||||
* type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt.
|
||||
|
||||
### Cross compile
|
||||
|
||||
Set `CC` and `FC` to point to the cross toolchains, and set `HOSTCC` to your host C compiler.
|
||||
The target must be specified explicitly when cross compiling.
|
||||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
|
||||
|
||||
Examples:
|
||||
|
||||
* On an x86 box, compile this library for a loongson3a CPU:
|
||||
```sh
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
```
|
||||
On X86 box, compile this library for loongson3a CPU.
|
||||
|
||||
* On an x86 box, compile this library for a loongson3a CPU with loongcc (based on Open64) compiler:
|
||||
```sh
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
```
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
|
||||
On X86 box, compile this library for loongson3a CPU with loongcc (based on Open64) compiler.
|
||||
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
|
||||
### Debug version
|
||||
|
||||
A debug version can be built using `make DEBUG=1`.
|
||||
make DEBUG=1
|
||||
|
||||
### Compile with MASS support on Power CPU (optional)
|
||||
### Compile with MASS Support on Power CPU (Optional dependency)
|
||||
|
||||
The [IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library
|
||||
consists of a set of mathematical functions for C, C++, and Fortran applications that are
|
||||
are tuned for optimum performance on POWER architectures.
|
||||
OpenBLAS with MASS requires a 64-bit, little-endian OS on POWER.
|
||||
The library can be installed as shown:
|
||||
[IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library consists of a set of mathematical functions for C, C++, and
|
||||
Fortran-language applications that are tuned for optimum performance on POWER architectures. OpenBLAS with MASS requires 64-bit, little-endian OS on POWER.
|
||||
The library can be installed as below -
|
||||
|
||||
* On Ubuntu:
|
||||
```sh
|
||||
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -
|
||||
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list
|
||||
sudo apt-get update
|
||||
sudo apt-get install libxlmass-devel.8.1.5
|
||||
```
|
||||
* On Ubuntu:
|
||||
|
||||
* On RHEL/CentOS:
|
||||
```sh
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key
|
||||
sudo rpm --import repomd.xml.key
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo
|
||||
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/
|
||||
sudo yum install libxlmass-devel.8.1.5
|
||||
```
|
||||
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -</br>
|
||||
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list</br>
|
||||
sudo apt-get update</br>
|
||||
sudo apt-get install libxlmass-devel.8.1.5</br>
|
||||
|
||||
After installing the MASS library, compile OpenBLAS with `USE_MASS=1`.
|
||||
For example, to compile on Power8 with MASS support: `make USE_MASS=1 TARGET=POWER8`.
|
||||
* On RHEL/CentOS:
|
||||
|
||||
### Install to a specific directory (optional)
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key</br>
|
||||
sudo rpm --import repomd.xml.key</br>
|
||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo</br>
|
||||
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/</br>
|
||||
sudo yum install libxlmass-devel.8.1.5</br>
|
||||
|
||||
Use `PREFIX=` when invoking `make`, for example
|
||||
After installing MASS library, compile openblas with USE_MASS=1.
|
||||
|
||||
```sh
|
||||
make install PREFIX=your_installation_directory
|
||||
```
|
||||
Example:
|
||||
|
||||
The default installation directory is `/opt/OpenBLAS`.
|
||||
Compiling on Power8 with MASS support -
|
||||
|
||||
## Supported CPUs and Operating Systems
|
||||
make USE_MASS=1 TARGET=POWER8
|
||||
|
||||
Please read `GotoBLAS_01Readme.txt`.
|
||||
### Install to the directory (optional)
|
||||
|
||||
### Additional supported CPUs
|
||||
Example:
|
||||
|
||||
#### x86/x86-64
|
||||
make install PREFIX=your_installation_directory
|
||||
|
||||
The default directory is /opt/OpenBLAS
|
||||
|
||||
## Support CPU & OS
|
||||
Please read GotoBLAS_01Readme.txt
|
||||
|
||||
### Additional support CPU:
|
||||
|
||||
#### x86/x86-64:
|
||||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
|
||||
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
|
||||
- **Intel Skylake**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
|
||||
#### MIPS64
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
- **ICT Loongson 3B**: Experimental
|
||||
|
||||
#### ARM
|
||||
#### ARM:
|
||||
- **ARMV6**: Optimized BLAS for vfpv2 and vfpv3-d16 ( e.g. BCM2835, Cortex M0+ )
|
||||
- **ARMV7**: Optimized BLAS for vfpv3-d32 ( e.g. Cortex A8, A9 and A15 )
|
||||
|
||||
- **ARMv6**: Optimized BLAS for vfpv2 and vfpv3-d16 (e.g. BCM2835, Cortex M0+)
|
||||
- **ARMv7**: Optimized BLAS for vfpv3-d32 (e.g. Cortex A8, A9 and A15)
|
||||
|
||||
#### ARM64
|
||||
|
||||
- **ARMv8**: Experimental
|
||||
#### ARM64:
|
||||
- **ARMV8**: Experimental
|
||||
- **ARM Cortex-A57**: Experimental
|
||||
|
||||
#### PPC/PPC64
|
||||
|
||||
- **POWER8**: Optmized Level-3 BLAS and some Level-1, only with `USE_OPENMP=1`
|
||||
|
||||
#### IBM zEnterprise System
|
||||
|
||||
- **Z13**: Optimized Level-3 BLAS and Level-1,2 (double precision)
|
||||
|
||||
### Supported OS
|
||||
#### IBM zEnterprise System:
|
||||
- **Z13**: Optimized Level-3 BLAS
|
||||
|
||||
|
||||
### Support OS:
|
||||
- **GNU/Linux**
|
||||
- **MinGW or Visual Studio (CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/macOS**: Experimental. Although GotoBLAS2 supports Darwin, we are not macOS experts.
|
||||
- **FreeBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **OpenBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **DragonFly BSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
||||
- **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
||||
- **Android**: Supported by community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
||||
|
||||
## Usage
|
||||
## Usages
|
||||
Link with libopenblas.a or -lopenblas for shared library.
|
||||
|
||||
Statically link with `libopenblas.a` or dynamically link with `-lopenblas` if OpenBLAS was
|
||||
compiled as a shared library.
|
||||
### Set the number of threads with environment variables.
|
||||
|
||||
### Setting the number of threads using environment variables
|
||||
Examples:
|
||||
|
||||
Environment variables are used to specify a maximum number of threads.
|
||||
For example,
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
|
||||
```sh
|
||||
export OPENBLAS_NUM_THREADS=4
|
||||
export GOTO_NUM_THREADS=4
|
||||
export OMP_NUM_THREADS=4
|
||||
```
|
||||
or
|
||||
|
||||
The priorities are `OPENBLAS_NUM_THREADS` > `GOTO_NUM_THREADS` > `OMP_NUM_THREADS`.
|
||||
export GOTO_NUM_THREADS=4
|
||||
|
||||
If you compile this library with `USE_OPENMP=1`, you should set the `OMP_NUM_THREADS`
|
||||
environment variable; OpenBLAS ignores `OPENBLAS_NUM_THREADS` and `GOTO_NUM_THREADS` when
|
||||
compiled with `USE_OPENMP=1`.
|
||||
or
|
||||
|
||||
### Setting the number of threads at runtime
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
We provide the following functions to control the number of threads at runtime:
|
||||
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
```c
|
||||
void goto_set_num_threads(int num_threads);
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
```
|
||||
If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1.
|
||||
|
||||
If you compile this library with `USE_OPENMP=1`, you should use the above functions too.
|
||||
### Set the number of threads on runtime.
|
||||
|
||||
## Reporting bugs
|
||||
We provided the below functions to control the number of threads on runtime.
|
||||
|
||||
Please submit an issue in https://github.com/xianyi/OpenBLAS/issues.
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
|
||||
|
||||
## Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
## Contact
|
||||
|
||||
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
|
||||
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev
|
||||
|
||||
## Change log
|
||||
|
||||
Please see Changelog.txt to view the differences between OpenBLAS and GotoBLAS2 1.13 BSD version.
|
||||
## ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* Please read the [FAQ](https://github.com/xianyi/OpenBLAS/wiki/Faq) first.
|
||||
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
|
||||
Clang 3.0 will generate the wrong AVX binary code.
|
||||
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake AVX512 kernels.
|
||||
* The number of CPUs/cores should less than or equal to 256. On Linux `x86_64` (`amd64`),
|
||||
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
|
||||
the library with `BIGNUMA=1`.
|
||||
* OpenBLAS does not set processor affinity by default.
|
||||
On Linux, you can enable processor affinity by commenting out the line `NO_AFFINITY=1` in
|
||||
Makefile.rule. However, note that this may cause
|
||||
[a conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A, `make test` may fail with a `pthread_create` error (`EAGAIN`).
|
||||
However, it will be okay when you run the same test case on the shell.
|
||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||
|
||||
## Contributing
|
||||
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue
|
||||
to start a discussion around a feature idea or a bug.
|
||||
2. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
3. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
4. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue to start a discussion around a feature idea or a bug.
|
||||
1. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
1. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
1. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
|
||||
## Donation
|
||||
|
||||
Please read [this wiki page](https://github.com/xianyi/OpenBLAS/wiki/Donation).
|
||||
|
||||
@@ -20,7 +20,6 @@ DUNNINGTON
|
||||
NEHALEM
|
||||
SANDYBRIDGE
|
||||
HASWELL
|
||||
SKYLAKEX
|
||||
ATOM
|
||||
|
||||
b)AMD CPU:
|
||||
@@ -57,7 +56,6 @@ CELL
|
||||
|
||||
3.MIPS CPU:
|
||||
P5600
|
||||
1004K
|
||||
|
||||
4.MIPS64 CPU:
|
||||
SICORTEX
|
||||
@@ -65,7 +63,6 @@ LOONGSON3A
|
||||
LOONGSON3B
|
||||
I6400
|
||||
P6600
|
||||
I6500
|
||||
|
||||
5.IA64 CPU:
|
||||
ITANIUM2
|
||||
@@ -83,14 +80,8 @@ ARMV5
|
||||
|
||||
8.ARM 64-bit CPU:
|
||||
ARMV8
|
||||
CORTEXA53
|
||||
CORTEXA57
|
||||
CORTEXA72
|
||||
CORTEXA73
|
||||
FALKOR
|
||||
VULCAN
|
||||
THUNDERX
|
||||
THUNDERX2T99
|
||||
|
||||
9.System Z:
|
||||
ZARCH_GENERIC
|
||||
Z13
|
||||
|
||||
14
USAGE.md
14
USAGE.md
@@ -14,20 +14,6 @@ Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
|
||||
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
|
||||
`MAX_CPU_NUMBER=NUM_THREADS`.
|
||||
|
||||
Despite its name, and due to the use of memory buffers in functions like SGEMM,
|
||||
the setting of NUM_THREADS can be relevant even for a single-threaded build
|
||||
of OpenBLAS, if such functions get called by multiple threads of a program
|
||||
that uses OpenBLAS. In some cases, the affected code may simply crash or throw
|
||||
a segmentation fault without displaying the above warning first.
|
||||
|
||||
Note that the number of threads used at runtime can be altered to differ from the
|
||||
value NUM_THREADS was set to at build time. At runtime, the actual number of
|
||||
threads can be set anywhere from 1 to the build's NUM_THREADS (note however,
|
||||
that this does not change the number of memory buffers that will be allocated,
|
||||
which is set at build time). The number of threads for a process can be set by
|
||||
using the mechanisms described below.
|
||||
|
||||
|
||||
#### How can I use OpenBLAS in multi-threaded applications?
|
||||
|
||||
If your application is already multi-threaded, it will conflict with OpenBLAS
|
||||
|
||||
52
appveyor.yml
52
appveyor.yml
@@ -5,8 +5,6 @@ version: 0.2.19.{build}
|
||||
platform:
|
||||
- x64
|
||||
|
||||
os: Visual Studio 2017
|
||||
|
||||
configuration: Release
|
||||
|
||||
clone_folder: c:\projects\OpenBLAS
|
||||
@@ -14,53 +12,33 @@ clone_folder: c:\projects\OpenBLAS
|
||||
init:
|
||||
- git config --global core.autocrlf input
|
||||
|
||||
build:
|
||||
project: OpenBLAS.sln
|
||||
|
||||
clone_depth: 5
|
||||
|
||||
#branches to build
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
- develop
|
||||
- cmake
|
||||
|
||||
skip_tags: true
|
||||
|
||||
matrix:
|
||||
fast_finish: false
|
||||
fast_finish: true
|
||||
|
||||
skip_commits:
|
||||
# Add [av skip] to commit messages
|
||||
message: /\[av skip\]/
|
||||
|
||||
environment:
|
||||
global:
|
||||
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
|
||||
matrix:
|
||||
- COMPILER: clang-cl
|
||||
WITH_FORTRAN: yes
|
||||
- COMPILER: clang-cl
|
||||
DYNAMIC_ARCH: ON
|
||||
WITH_FORTRAN: no
|
||||
- COMPILER: cl
|
||||
|
||||
install:
|
||||
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
|
||||
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
|
||||
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake
|
||||
|
||||
- if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja
|
||||
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja
|
||||
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang
|
||||
|
||||
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
|
||||
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
|
||||
|
||||
before_build:
|
||||
- ps: if (-Not (Test-Path .\build)) { mkdir build }
|
||||
- cd build
|
||||
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" ..
|
||||
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl ..
|
||||
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
|
||||
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON ..
|
||||
|
||||
build_script:
|
||||
- cmake --build .
|
||||
- echo Running cmake...
|
||||
- cd c:\projects\OpenBLAS
|
||||
- cmake -G "Visual Studio 12 Win64" .
|
||||
|
||||
test_script:
|
||||
- echo Running Test
|
||||
- cd utest
|
||||
- cd c:\projects\OpenBLAS\utest
|
||||
- openblas_utest
|
||||
|
||||
140
benchmark/gemm.c
140
benchmark/gemm.c
@@ -121,15 +121,13 @@ static void *huge_malloc(BLASLONG size){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
char transa = 'N';
|
||||
char transb = 'N';
|
||||
blasint m, n, k, i, j, lda, ldb, ldc;
|
||||
char trans='N';
|
||||
blasint m, n, i, j;
|
||||
int loops = 1;
|
||||
int has_param_m = 0;
|
||||
int has_param_n = 0;
|
||||
int has_param_k = 0;
|
||||
int has_param_n=0;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
@@ -137,108 +135,86 @@ int main(int argc, char *argv[]){
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1, timeg;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++; }
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++; }
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) {
|
||||
transa=*p;
|
||||
transb=*p;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_TRANSA"))) {
|
||||
transa=*p;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_TRANSB"))) {
|
||||
transb=*p;
|
||||
}
|
||||
TOUPPER(transa);
|
||||
TOUPPER(transb);
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb);
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL ) {
|
||||
loops = atoi(p);
|
||||
}
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
if ((p = getenv("OPENBLAS_PARAM_M"))) {
|
||||
m = atoi(p);
|
||||
has_param_m=1;
|
||||
} else {
|
||||
m = to;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
} else {
|
||||
n = to;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_PARAM_K"))) {
|
||||
k = atoi(p);
|
||||
has_param_k=1;
|
||||
} else {
|
||||
k = to;
|
||||
}
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
for (i = 0; i < m * k * COMPSIZE; i++) {
|
||||
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < k * n * COMPSIZE; i++) {
|
||||
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < m * n * COMPSIZE; i++) {
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
for(j = 0; j < to; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
for (i = from; i <= to; i += step) {
|
||||
|
||||
timeg=0;
|
||||
|
||||
if (!has_param_m) { m = i; }
|
||||
if (!has_param_n) { n = i; }
|
||||
if (!has_param_k) { k = i; }
|
||||
if ( has_param_n == 1 && n <= m )
|
||||
n=n;
|
||||
else
|
||||
n=m;
|
||||
|
||||
if (transa == 'N') { lda = m; }
|
||||
else { lda = k; }
|
||||
if (transb == 'N') { ldb = k; }
|
||||
else { ldb = n; }
|
||||
ldc = m;
|
||||
|
||||
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
for (j=0; j<loops; j++) {
|
||||
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
|
||||
}
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
|
||||
|
||||
}
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg = time1/loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)k * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -122,7 +122,7 @@ int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 0.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, i, j;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
|
||||
22
c_check
22
c_check
@@ -54,8 +54,6 @@ $compiler = GCC if ($compiler eq "");
|
||||
$os = Linux if ($data =~ /OS_LINUX/);
|
||||
$os = FreeBSD if ($data =~ /OS_FREEBSD/);
|
||||
$os = NetBSD if ($data =~ /OS_NETBSD/);
|
||||
$os = OpenBSD if ($data =~ /OS_OPENBSD/);
|
||||
$os = DragonFly if ($data =~ /OS_DRAGONFLY/);
|
||||
$os = Darwin if ($data =~ /OS_DARWIN/);
|
||||
$os = SunOS if ($data =~ /OS_SUNOS/);
|
||||
$os = AIX if ($data =~ /OS_AIX/);
|
||||
@@ -64,7 +62,6 @@ $os = WINNT if ($data =~ /OS_WINNT/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
|
||||
$os = Interix if ($data =~ /OS_INTERIX/);
|
||||
$os = Android if ($data =~ /OS_ANDROID/);
|
||||
$os = Haiku if ($data =~ /OS_HAIKU/);
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||
@@ -202,21 +199,6 @@ $architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
||||
$binformat = bin32;
|
||||
$binformat = bin64 if ($data =~ /BINARY_64/);
|
||||
|
||||
$no_avx512= 0;
|
||||
if (($architecture eq "x86") || ($architecture eq "x86_64")) {
|
||||
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
|
||||
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
|
||||
$args = " -march=skylake-avx512 -o $tmpf.o -x c $tmpf";
|
||||
my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null");
|
||||
system(@cmd) == 0;
|
||||
if ($? != 0) {
|
||||
$no_avx512 = 1;
|
||||
} else {
|
||||
$no_avx512 = 0;
|
||||
}
|
||||
unlink("tmpf.o");
|
||||
}
|
||||
|
||||
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
||||
|
||||
$data =~ /globl\s([_\.]*)(.*)/;
|
||||
@@ -224,6 +206,7 @@ $data =~ /globl\s([_\.]*)(.*)/;
|
||||
$need_fu = $1;
|
||||
|
||||
$cross = 0;
|
||||
$cross = 1 if ($os ne $hostos);
|
||||
|
||||
if ($architecture ne $hostarch) {
|
||||
$cross = 1;
|
||||
@@ -231,8 +214,6 @@ if ($architecture ne $hostarch) {
|
||||
$cross = 0 if (($hostarch eq "mips64") && ($architecture eq "mips"));
|
||||
}
|
||||
|
||||
$cross = 1 if ($os ne $hostos);
|
||||
|
||||
$openmp = "" if $ENV{USE_OPENMP} != 1;
|
||||
|
||||
$linker_L = "";
|
||||
@@ -305,7 +286,6 @@ print MAKEFILE "CROSS=1\n" if $cross != 0;
|
||||
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
|
||||
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
|
||||
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
|
||||
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
|
||||
|
||||
$os =~ tr/[a-z]/[A-Z]/;
|
||||
$architecture =~ tr/[a-z]/[A-Z]/;
|
||||
|
||||
176
cblas.h
176
cblas.h
@@ -51,57 +51,51 @@ typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=1
|
||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
typedef CBLAS_ORDER CBLAS_LAYOUT;
|
||||
|
||||
|
||||
float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
double cblas_ddot(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
|
||||
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
|
||||
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
|
||||
|
||||
float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX);
|
||||
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
|
||||
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
CBLAS_INDEX cblas_isamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
CBLAS_INDEX cblas_isamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||
|
||||
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
||||
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
||||
@@ -117,59 +111,59 @@ void cblas_drotmg(double *d1, double *d2, double *b1, OPENBLAS_CONST double b2,
|
||||
|
||||
void cblas_sscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_sgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_dgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_cgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_strsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_strmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_ssyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo,OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_dsyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,
|
||||
OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
|
||||
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX,
|
||||
OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
|
||||
|
||||
void cblas_sgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_cgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_ssbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
@@ -182,45 +176,45 @@ void cblas_stbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLA
|
||||
void cblas_dtbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_stpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_dtpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ctpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
|
||||
void cblas_ztpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
|
||||
|
||||
void cblas_ssymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_dsymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A,
|
||||
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
|
||||
void cblas_sspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *Ap,
|
||||
@@ -231,36 +225,36 @@ void cblas_dspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLA
|
||||
void cblas_sspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *Ap);
|
||||
void cblas_dspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *Ap);
|
||||
|
||||
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A);
|
||||
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X,OPENBLAS_CONST blasint incX, void *A);
|
||||
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A);
|
||||
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,OPENBLAS_CONST blasint incX, double *A);
|
||||
|
||||
void cblas_sspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A);
|
||||
void cblas_dspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A);
|
||||
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap);
|
||||
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap);
|
||||
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *Ap);
|
||||
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *Ap);
|
||||
|
||||
void cblas_chbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_chpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *Ap, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
|
||||
void cblas_zhpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *Ap, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
|
||||
|
||||
void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
|
||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
@@ -268,60 +262,60 @@ void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
|
||||
void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_csyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_strmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_strsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_dtrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ctrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
|
||||
void cblas_ztrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
|
||||
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
|
||||
|
||||
void cblas_chemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zhemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_xerbla(blasint p, char *rout, char *form, ...);
|
||||
|
||||
@@ -331,9 +325,9 @@ void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS
|
||||
|
||||
void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
void cblas_somatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a,
|
||||
OPENBLAS_CONST blasint clda, float *b, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
@@ -1,79 +0,0 @@
|
||||
# OpenBLASConfig.cmake
|
||||
# --------------------
|
||||
#
|
||||
# OpenBLAS cmake module.
|
||||
# This module sets the following variables in your project::
|
||||
#
|
||||
# OpenBLAS_FOUND - true if OpenBLAS and all required components found on the system
|
||||
# OpenBLAS_VERSION - OpenBLAS version in format Major.Minor.Release
|
||||
# OpenBLAS_INCLUDE_DIRS - Directory where OpenBLAS header is located.
|
||||
# OpenBLAS_INCLUDE_DIR - same as DIRS
|
||||
# OpenBLAS_LIBRARIES - OpenBLAS library to link against.
|
||||
# OpenBLAS_LIBRARY - same as LIBRARIES
|
||||
#
|
||||
#
|
||||
# Available components::
|
||||
#
|
||||
## shared - search for only shared library
|
||||
## static - search for only static library
|
||||
# serial - search for unthreaded library
|
||||
# pthread - search for native pthread threaded library
|
||||
# openmp - search for OpenMP threaded library
|
||||
#
|
||||
#
|
||||
# Exported targets::
|
||||
#
|
||||
# If OpenBLAS is found, this module defines the following :prop_tgt:`IMPORTED`
|
||||
## target. Target is shared _or_ static, so, for both, use separate, not
|
||||
## overlapping, installations. ::
|
||||
#
|
||||
# OpenBLAS::OpenBLAS - the main OpenBLAS library #with header & defs attached.
|
||||
#
|
||||
#
|
||||
# Suggested usage::
|
||||
#
|
||||
# find_package(OpenBLAS)
|
||||
# find_package(OpenBLAS 0.2.20 EXACT CONFIG REQUIRED COMPONENTS pthread)
|
||||
#
|
||||
#
|
||||
# The following variables can be set to guide the search for this package::
|
||||
#
|
||||
# OpenBLAS_DIR - CMake variable, set to directory containing this Config file
|
||||
# CMAKE_PREFIX_PATH - CMake variable, set to root directory of this package
|
||||
# PATH - environment variable, set to bin directory of this package
|
||||
# CMAKE_DISABLE_FIND_PACKAGE_OpenBLAS - CMake variable, disables
|
||||
# find_package(OpenBLAS) when not REQUIRED, perhaps to force internal build
|
||||
|
||||
@PACKAGE_INIT@
|
||||
|
||||
set(PN OpenBLAS)
|
||||
|
||||
# need to check that the @USE_*@ evaluate to something cmake can perform boolean logic upon
|
||||
if(@USE_OPENMP@)
|
||||
set(${PN}_openmp_FOUND 1)
|
||||
elseif(@USE_THREAD@)
|
||||
set(${PN}_pthread_FOUND 1)
|
||||
else()
|
||||
set(${PN}_serial_FOUND 1)
|
||||
endif()
|
||||
|
||||
check_required_components(${PN})
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Don't include targets if this file is being picked up by another
|
||||
# project which has already built this as a subproject
|
||||
#-----------------------------------------------------------------------------
|
||||
if(NOT TARGET ${PN}::OpenBLAS)
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake")
|
||||
|
||||
get_property(_loc TARGET ${PN}::OpenBLAS PROPERTY LOCATION)
|
||||
set(${PN}_LIBRARY ${_loc})
|
||||
get_property(_ill TARGET ${PN}::OpenBLAS PROPERTY INTERFACE_LINK_LIBRARIES)
|
||||
set(${PN}_LIBRARIES ${_ill})
|
||||
|
||||
get_property(_id TARGET ${PN}::OpenBLAS PROPERTY INCLUDE_DIRECTORIES)
|
||||
set(${PN}_INCLUDE_DIR ${_id})
|
||||
get_property(_iid TARGET ${PN}::OpenBLAS PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
|
||||
set(${PN}_INCLUDE_DIRS ${_iid})
|
||||
endif()
|
||||
|
||||
@@ -3,9 +3,9 @@
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets various variables based on architecture.
|
||||
|
||||
if (X86 OR X86_64)
|
||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
|
||||
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
if (NOT BINARY)
|
||||
set(NO_BINARY_MODE 1)
|
||||
endif ()
|
||||
@@ -33,46 +33,47 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
# NO_AFFINITY = 1
|
||||
find_package(OpenMP REQUIRED)
|
||||
if (OpenMP_FOUND)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} ${OpenMP_C_FLAGS} -DUSE_OPENMP")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} ${OpenMP_Fortran_FLAGS}")
|
||||
endif()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
message(WARNING "Clang doesn't support OpenMP yet.")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (ARM64)
|
||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99)
|
||||
endif ()
|
||||
|
||||
if (X86)
|
||||
set(DYNAMIC_CORE KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
|
||||
endif ()
|
||||
|
||||
if (X86_64)
|
||||
set(DYNAMIC_CORE PRESCOTT CORE2)
|
||||
if (DYNAMIC_OLDER)
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} PENRYN DUNNINGTON)
|
||||
endif ()
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} NEHALEM)
|
||||
if (DYNAMIC_OLDER)
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} OPTERON OPTERON_SSE3)
|
||||
endif ()
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} BARCELONA)
|
||||
if (DYNAMIC_OLDER)
|
||||
set (DYNAMIC_CORE ${DYNAMIC_CORE} BOBCAT ATOM NANO)
|
||||
endif ()
|
||||
if (${ARCH} STREQUAL "x86_64")
|
||||
set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
|
||||
if (NOT NO_AVX)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR)
|
||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
|
||||
endif ()
|
||||
if (NOT NO_AVX2)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
|
||||
endif ()
|
||||
if (NOT NO_AVX512)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
|
||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL ZEN")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
@@ -93,7 +94,7 @@ if (${ARCH} STREQUAL "ia64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
endif ()
|
||||
|
||||
@@ -102,12 +103,12 @@ if (${ARCH} STREQUAL "alpha")
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (ARM)
|
||||
if (${ARCH} STREQUAL "arm")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (ARM64)
|
||||
if (${ARCH} STREQUAL "arm64")
|
||||
set(NO_BINARY_MODE 1)
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
98
cmake/c_check.cmake
Normal file
98
cmake/c_check.cmake
Normal file
@@ -0,0 +1,98 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from the OpenBLAS/c_check perl script.
|
||||
## This is triggered by prebuild.cmake and runs before any of the code is built.
|
||||
## Creates config.h and Makefile.conf.
|
||||
|
||||
# CMake vars set by this file:
|
||||
# OSNAME (use CMAKE_SYSTEM_NAME)
|
||||
# ARCH
|
||||
# C_COMPILER (use CMAKE_C_COMPILER)
|
||||
# BINARY32
|
||||
# BINARY64
|
||||
# FU
|
||||
# CROSS_SUFFIX
|
||||
# CROSS
|
||||
# CEXTRALIB
|
||||
|
||||
# Defines set by this file:
|
||||
# OS_
|
||||
# ARCH_
|
||||
# C_
|
||||
# __32BIT__
|
||||
# __64BIT__
|
||||
# FUNDERSCORE
|
||||
# PTHREAD_CREATE_FUNC
|
||||
|
||||
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
|
||||
set(FU "")
|
||||
if(APPLE)
|
||||
set(FU "_")
|
||||
elseif(MSVC)
|
||||
set(FU "_")
|
||||
elseif(UNIX)
|
||||
set(FU "")
|
||||
endif()
|
||||
|
||||
# Convert CMake vars into the format that OpenBLAS expects
|
||||
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
|
||||
if (${HOST_OS} STREQUAL "WINDOWS")
|
||||
set(HOST_OS WINNT)
|
||||
endif ()
|
||||
|
||||
# added by hpa - check size of void ptr to detect 64-bit compile
|
||||
if (NOT DEFINED BINARY)
|
||||
set(BINARY 32)
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(BINARY 64)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (BINARY EQUAL 64)
|
||||
set(BINARY64 1)
|
||||
else ()
|
||||
set(BINARY32 1)
|
||||
endif ()
|
||||
|
||||
# CMake docs define these:
|
||||
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
|
||||
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
|
||||
#
|
||||
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
|
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
|
||||
if (${ARCH} STREQUAL "AMD64")
|
||||
set(ARCH "x86_64")
|
||||
endif ()
|
||||
|
||||
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
|
||||
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
|
||||
set(ARCH x86)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} STREQUAL "X86")
|
||||
set(ARCH x86)
|
||||
endif ()
|
||||
|
||||
if (${ARCH} MATCHES "ppc")
|
||||
set(ARCH power)
|
||||
endif ()
|
||||
|
||||
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
|
||||
if (${COMPILER_ID} STREQUAL "GNU")
|
||||
set(COMPILER_ID "GCC")
|
||||
endif ()
|
||||
|
||||
string(TOUPPER ${ARCH} UC_ARCH)
|
||||
|
||||
file(WRITE ${TARGET_CONF}
|
||||
"#define OS_${HOST_OS}\t1\n"
|
||||
"#define ARCH_${UC_ARCH}\t1\n"
|
||||
"#define C_${COMPILER_ID}\t1\n"
|
||||
"#define __${BINARY}BIT__\t1\n"
|
||||
"#define FUNDERSCORE\t${FU}\n")
|
||||
|
||||
if (${HOST_OS} STREQUAL "WINDOWSSTORE")
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define OS_WINNT\t1\n")
|
||||
endif ()
|
||||
|
||||
@@ -15,7 +15,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR
|
||||
|
||||
if (NO_BINARY_MODE)
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
@@ -24,12 +24,17 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "LOONGSON3A" OR ${CORE} STREQUAL "LOONGSON3B")
|
||||
if (${CORE} STREQUAL "LOONGSON3A")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_SYSTEM_NAME STREQUAL "AIX")
|
||||
if (${CORE} STREQUAL "LOONGSON3B")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
|
||||
endif ()
|
||||
|
||||
if (${OSNAME} STREQUAL "AIX")
|
||||
set(BINARY_DEFINED 1)
|
||||
endif ()
|
||||
endif ()
|
||||
@@ -61,7 +66,7 @@ endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
|
||||
if (NOT BINARY64)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
|
||||
@@ -89,10 +94,10 @@ endif ()
|
||||
|
||||
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -w")
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
||||
@@ -51,8 +51,7 @@ else()
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${PROJECT_BINARY_DIR}/openblas.def
|
||||
#TARGET ${OpenBLAS_LIBNAME} PRE_LINK
|
||||
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
|
||||
COMMAND perl
|
||||
ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
|
||||
COMMENT "Create openblas.def file"
|
||||
|
||||
@@ -20,6 +20,12 @@
|
||||
# NEEDBUNDERSCORE
|
||||
# NEED2UNDERSCORES
|
||||
|
||||
if (MSVC)
|
||||
# had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
|
||||
include(CMakeForceCompiler)
|
||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
|
||||
endif ()
|
||||
|
||||
if (NOT NO_LAPACK)
|
||||
enable_language(Fortran)
|
||||
else()
|
||||
@@ -28,13 +34,17 @@ else()
|
||||
endif()
|
||||
|
||||
if (NOT ONLY_CBLAS)
|
||||
# N.B. f_check is not cross-platform, so instead try to use CMake variables
|
||||
# run f_check (appends to TARGET files)
|
||||
# message(STATUS "Running f_check...")
|
||||
# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
|
||||
# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
|
||||
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
|
||||
# TODO: set FEXTRALIB flags a la f_check?
|
||||
|
||||
set(BU "_")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n"
|
||||
"#define NEED2UNDERSCORES 0\n")
|
||||
@@ -46,7 +56,7 @@ else ()
|
||||
set(NO_FBLAS 1)
|
||||
#set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
|
||||
set(BU "_")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
file(APPEND ${TARGET_CONF}
|
||||
"#define BUNDERSCORE _\n"
|
||||
"#define NEEDBUNDERSCORE 1\n")
|
||||
endif()
|
||||
|
||||
@@ -3,15 +3,15 @@
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Sets Fortran related variables.
|
||||
|
||||
if (INTERFACE64)
|
||||
set(SUFFIX64 64)
|
||||
set(SUFFIX64_UNDERSCORE _64)
|
||||
endif()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "FLANG")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG")
|
||||
if (BINARY64 AND INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
if (BINARY64)
|
||||
if (INTERFACE64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
|
||||
endif ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
endif ()
|
||||
if (USE_OPENMP)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
|
||||
@@ -44,13 +44,13 @@ endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "GFORTRAN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
|
||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
|
||||
if (NOT NO_LAPACK)
|
||||
set(EXTRALIB "{EXTRALIB} -lgfortran")
|
||||
endif ()
|
||||
if (NO_BINARY_MODE)
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
|
||||
else ()
|
||||
@@ -130,7 +130,7 @@ if (${F_COMPILER} STREQUAL "PATHSCALE")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT MIPS64)
|
||||
if (NOT ${ARCH} STREQUAL "mips64")
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
@@ -158,7 +158,7 @@ if (${F_COMPILER} STREQUAL "OPEN64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
|
||||
if (NOT BINARY64)
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
|
||||
@@ -189,7 +189,7 @@ endif ()
|
||||
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
|
||||
|
||||
@@ -1,485 +1,387 @@
|
||||
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
|
||||
|
||||
set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F
|
||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
|
||||
../INSTALL/ilaver.f xerbla_array.f
|
||||
../INSTALL/slamch.f)
|
||||
set(ALLAUX
|
||||
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
|
||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f iparam2stage.F chla_transtype.f
|
||||
../INSTALL/ilaver.f ../INSTALL/slamch.f
|
||||
)
|
||||
|
||||
set(SCLAUX
|
||||
sbdsdc.f
|
||||
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
|
||||
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
|
||||
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
|
||||
slagts.f slamrg.f slanst.f
|
||||
slapy2.f slapy3.f slarnv.f
|
||||
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
|
||||
slarrk.f slarrr.f slaneg.f
|
||||
slartg.f slaruv.f slas2.f slascl.f
|
||||
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
|
||||
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
|
||||
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
|
||||
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
|
||||
ssteqr.f ssterf.f slaisnan.f sisnan.f
|
||||
slartgp.f slartgs.f
|
||||
../INSTALL/second_${TIMER}.f)
|
||||
sbdsdc.f
|
||||
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
|
||||
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
|
||||
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
|
||||
slagts.f slamrg.f slanst.f
|
||||
slapy2.f slapy3.f slarnv.f
|
||||
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
|
||||
slarrk.f slarrr.f slaneg.f
|
||||
slartg.f slaruv.f slas2.f slascl.f
|
||||
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
|
||||
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
|
||||
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
|
||||
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
|
||||
ssteqr.f ssterf.f slaisnan.f sisnan.f
|
||||
slartgp.f slartgs.f
|
||||
../INSTALL/second_${TIMER}.f
|
||||
)
|
||||
|
||||
set(DZLAUX
|
||||
dbdsdc.f
|
||||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
|
||||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
|
||||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
|
||||
dlagts.f dlamrg.f dlanst.f
|
||||
dlapy2.f dlapy3.f dlarnv.f
|
||||
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
|
||||
dlarrk.f dlarrr.f dlaneg.f
|
||||
dlartg.f dlaruv.f dlas2.f dlascl.f
|
||||
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
|
||||
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
|
||||
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
|
||||
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
|
||||
dsteqr.f dsterf.f dlaisnan.f disnan.f
|
||||
dlartgp.f dlartgs.f
|
||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f)
|
||||
dbdsdc.f dbdsvdx.f
|
||||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
|
||||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
|
||||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
|
||||
dlagts.f dlamrg.f dlanst.f
|
||||
dlapy2.f dlapy3.f dlarnv.f
|
||||
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
|
||||
dlarrk.f dlarrr.f dlaneg.f
|
||||
dlartg.f dlaruv.f dlas2.f dlascl.f
|
||||
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
|
||||
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
|
||||
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
|
||||
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
|
||||
dsteqr.f dsterf.f dlaisnan.f disnan.f
|
||||
dlartgp.f dlartgs.f
|
||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
|
||||
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f
|
||||
dgetsls.f dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f
|
||||
dtplqt2.f dtpmlqt.f dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f
|
||||
dsytf2_rk.f dlasyf_rk.f dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f
|
||||
dsytri_3x.f dsysv_rk.f dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f
|
||||
dsbevx_2stage.f dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f
|
||||
dsyevx_2stage.f dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F
|
||||
dsytrd_sy2sb.f dlarfy.f
|
||||
)
|
||||
|
||||
set(SLASRC
|
||||
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
||||
sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
||||
sgels.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f
|
||||
sgeqp3.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgerq2.f sgerqf.f
|
||||
sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f sgetc2.f
|
||||
sgetrf2.f sgetri.f
|
||||
sggbak.f sggbal.f
|
||||
sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f
|
||||
sggglm.f sgghrd.f sgghd3.f sgglse.f sggqrf.f
|
||||
sggrqf.f sggsvd3.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
|
||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
||||
slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
||||
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
||||
slapll.f slapmt.f
|
||||
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
|
||||
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
|
||||
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
|
||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
|
||||
slarrv.f slartv.f
|
||||
slarz.f slarzb.f slarzt.f slasy2.f
|
||||
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
|
||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f
|
||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f
|
||||
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
|
||||
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
|
||||
spbstf.f spbsv.f spbsvx.f
|
||||
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
|
||||
sposvx.f spotrf2.f spotri.f spstrf.f spstf2.f
|
||||
sppcon.f sppequ.f
|
||||
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
|
||||
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
|
||||
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
|
||||
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
|
||||
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
|
||||
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
|
||||
sstevx.f ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
|
||||
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
|
||||
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
|
||||
ssyswapr.f ssytrs.f ssytrs2.f
|
||||
ssyconv.f ssyconvf.f ssyconvf_rook.f
|
||||
ssysv_aa.f ssysv_aa_2stage.f ssytrf_aa.f ssytrf_aa_2stage.f ssytrs_aa.f ssytrs_aa_2stage.f
|
||||
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
|
||||
ssytri_rook.f ssycon_rook.f ssysv_rook.f
|
||||
ssytf2_rk.f ssytrf_rk.f ssytrs_3.f
|
||||
ssytri_3.f ssytri_3x.f ssycon_3.f ssysv_rk.f
|
||||
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f
|
||||
stbcon.f
|
||||
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
|
||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
||||
stptrs.f
|
||||
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
||||
strtrs.f stzrzf.f sstemr.f
|
||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
||||
sgeequb.f ssyequb.f spoequb.f sgbequb.f
|
||||
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
|
||||
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
|
||||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
|
||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f
|
||||
sgelqt.f sgelqt3.f sgemlqt.f
|
||||
sgetsls.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
|
||||
sgelq.f slaswlq.f slamswlq.f sgemlq.f
|
||||
stplqt.f stplqt2.f stpmlqt.f
|
||||
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
|
||||
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
|
||||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f)
|
||||
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
||||
DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
||||
sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
|
||||
sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
|
||||
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f
|
||||
sgetc2.f sgetri.f sgetrf2.f
|
||||
sggbak.f sggbal.f sgghd3.f sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f
|
||||
sggglm.f sgghrd.f sgglse.f sggqrf.f
|
||||
sggrqf.f DEPRECATED/sggsvd.f sggsvd3.f DEPRECATED/sggsvp.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
|
||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
||||
DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
||||
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
||||
slapll.f slapmt.f
|
||||
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
|
||||
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
|
||||
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
|
||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
|
||||
slarrv.f slartv.f
|
||||
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
|
||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f
|
||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f
|
||||
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
|
||||
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
|
||||
spbstf.f spbsv.f spbsvx.f
|
||||
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
|
||||
sposvx.f spstrf.f spstf2.f
|
||||
sppcon.f sppequ.f
|
||||
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
|
||||
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
|
||||
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
|
||||
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
|
||||
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
|
||||
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
|
||||
sstevx.f
|
||||
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
|
||||
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
|
||||
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
|
||||
ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
|
||||
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
|
||||
ssytri_rook.f ssycon_rook.f ssysv_rook.f
|
||||
stbcon.f
|
||||
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
|
||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
||||
stptrs.f
|
||||
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
||||
strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f
|
||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
||||
sgeequb.f ssyequb.f spoequb.f sgbequb.f
|
||||
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
|
||||
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
|
||||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
|
||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
|
||||
sgelq.f sgelqt.f sgelqt3.f sgemlq.f sgemlqt.f sgemqr.f sgeqr.f sgetsls.f
|
||||
slamswlq.f slamtsqr.f slaswlq.f slatsqr.f stplqt.f stplqt2.f stpmlqt.f
|
||||
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f slasyf_aa.f ssytf2_rk.f slasyf_rk.f
|
||||
ssytrf_rk.f ssytrs_3.f ssycon_3.f ssytri_3.f ssytri_3x.f ssysv_rk.f
|
||||
ssb2st_kernels.f ssbev_2stage.f ssbevd_2stage.f ssbevx_2stage.f
|
||||
ssyev_2stage.f ssyevd_2stage.f ssyevr_2stage.f ssyevx_2stage.f
|
||||
ssygv_2stage.f ssytrd_2stage.f ssytrd_sb2st.F ssytrd_sy2sb.f slarfy.f
|
||||
)
|
||||
|
||||
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
|
||||
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
|
||||
sla_syrfsx_extended.f sla_syamv.f sla_syrcond.f sla_syrpvgrw.f
|
||||
sposvxx.f sporfsx.f sla_porfsx_extended.f sla_porcond.f
|
||||
sla_porpvgrw.f sgbsvxx.f sgbrfsx.f sla_gbrfsx_extended.f
|
||||
sla_gbamv.f sla_gbrcond.f sla_gbrpvgrw.f sla_lin_berr.f slarscl2.f
|
||||
slascl2.f sla_wwaddw.f)
|
||||
set(DSLASRC spotrs.f spotrf2.f)
|
||||
|
||||
set(CLASRC
|
||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
||||
cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
||||
cgels.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
||||
cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f cgerq2.f cgerqf.f
|
||||
cgesc2.f cgesdd.f cgesvd.f cgesvdx.f
|
||||
cgesvj.f cgejsv.f cgsvj0.f cgsvj1.f
|
||||
cgesvx.f cgetc2.f cgetrf2.f
|
||||
cgetri.f
|
||||
cggbak.f cggbal.f
|
||||
cgges.f cgges3.f cggesx.f cggev.f cggev3.f cggevx.f
|
||||
cggglm.f cgghrd.f cgghd3.f cgglse.f cggqrf.f cggrqf.f
|
||||
cggsvd3.f cggsvp3.f
|
||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
||||
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
|
||||
chetf2.f chetrd.f
|
||||
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
|
||||
chetrs.f chetrs2.f
|
||||
chetf2_rook.f chetrf_rook.f chetri_rook.f
|
||||
chetrs_rook.f checon_rook.f chesv_rook.f
|
||||
chetf2_rk.f chetrf_rk.f chetri_3.f chetri_3x.f
|
||||
chetrs_3.f checon_3.f chesv_rk.f
|
||||
chesv_aa.f chesv_aa_2stage.f chetrf_aa.f chetrf_aa_2stage.f chetrs_aa.f chetrs_aa_2stage.f
|
||||
chgeqz.f chpcon.f chpev.f chpevd.f
|
||||
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
|
||||
chpsvx.f
|
||||
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
|
||||
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
|
||||
claed0.f claed7.f claed8.f
|
||||
claein.f claesy.f claev2.f clags2.f clagtm.f
|
||||
clahef.f clahef_rook.f clahef_rk.f clahef_aa.f clahqr.f
|
||||
clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
||||
clanhb.f clanhe.f
|
||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
||||
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
|
||||
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
|
||||
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
|
||||
clarf.f clarfb.f clarfg.f clarfgp.f clarft.f
|
||||
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
||||
clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
|
||||
clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
||||
cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
||||
cposv.f cposvx.f cpotrf2.f cpotri.f cpstrf.f cpstf2.f
|
||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
||||
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
|
||||
crot.f cspcon.f csprfs.f cspsv.f
|
||||
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
|
||||
cstegr.f cstein.f csteqr.f csycon.f
|
||||
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f
|
||||
csytri2.f csytri2x.f csyswapr.f
|
||||
csytrs.f csytrs2.f
|
||||
csyconv.f csyconvf.f csyconvf_rook.f
|
||||
csytf2_rook.f csytrf_rook.f csytrs_rook.f
|
||||
csytri_rook.f csycon_rook.f csysv_rook.f
|
||||
csytf2_rk.f csytrf_rk.f csytrf_aa.f csytrf_aa_2stage.f csytrs_3.f csytrs_aa.f csytrs_aa_2stage.f
|
||||
csytri_3.f csytri_3x.f csycon_3.f csysv_rk.f csysv_aa.f csysv_aa_2stage.f
|
||||
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
|
||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
||||
ctprfs.f ctptri.f
|
||||
ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
||||
ctrsyl.f ctrtrs.f ctzrzf.f cung2l.f cung2r.f
|
||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f cunm22.f
|
||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
||||
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
|
||||
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
|
||||
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
|
||||
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
|
||||
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
|
||||
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
|
||||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
|
||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f
|
||||
cgelqt.f cgelqt3.f cgemlqt.f
|
||||
cgetsls.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
|
||||
cgelq.f claswlq.f clamswlq.f cgemlq.f
|
||||
ctplqt.f ctplqt2.f ctpmlqt.f
|
||||
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
|
||||
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
|
||||
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f)
|
||||
|
||||
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
|
||||
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
|
||||
csysvxx.f csyrfsx.f cla_syrfsx_extended.f cla_syamv.f
|
||||
cla_syrcond_c.f cla_syrcond_x.f cla_syrpvgrw.f
|
||||
cposvxx.f cporfsx.f cla_porfsx_extended.f
|
||||
cla_porcond_c.f cla_porcond_x.f cla_porpvgrw.f
|
||||
cgbsvxx.f cgbrfsx.f cla_gbrfsx_extended.f cla_gbamv.f
|
||||
cla_gbrcond_c.f cla_gbrcond_x.f cla_gbrpvgrw.f
|
||||
chesvxx.f cherfsx.f cla_herfsx_extended.f cla_heamv.f
|
||||
cla_hercond_c.f cla_hercond_x.f cla_herpvgrw.f
|
||||
cla_lin_berr.f clarscl2.f clascl2.f cla_wwaddw.f)
|
||||
|
||||
set(DLASRC
|
||||
dbdsvdx.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
||||
dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
||||
dgels.f dgelsd.f dgelss.f dgelsy.f dgeql2.f dgeqlf.f
|
||||
dgeqp3.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f dgerq2.f dgerqf.f
|
||||
dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f dgetc2.f
|
||||
dgetrf2.f dgetri.f
|
||||
dggbak.f dggbal.f
|
||||
dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f
|
||||
dggglm.f dgghrd.f dgghd3.f dgglse.f dggqrf.f
|
||||
dggrqf.f dggsvd3.f dggsvp3.f dgtcon.f dgtrfs.f dgtsv.f
|
||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
||||
dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
||||
dlapll.f dlapmt.f
|
||||
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
|
||||
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
|
||||
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
|
||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
|
||||
dlargv.f dlarrv.f dlartv.f
|
||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f
|
||||
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
|
||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f
|
||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f
|
||||
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
|
||||
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
|
||||
dpbstf.f dpbsv.f dpbsvx.f
|
||||
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
|
||||
dposvx.f dpotrf2.f dpotri.f dpotrs.f dpstrf.f dpstf2.f
|
||||
dppcon.f dppequ.f
|
||||
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
|
||||
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
|
||||
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
|
||||
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
|
||||
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
|
||||
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
|
||||
dstevx.f dsycon.f dsyev.f dsyevd.f dsyevr.f
|
||||
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
|
||||
dsysv.f dsysvx.f
|
||||
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytrs.f dsytrs2.f
|
||||
dsytri2.f dsytri2x.f dsyswapr.f
|
||||
dsyconv.f dsyconvf.f dsyconvf_rook.f
|
||||
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
|
||||
dsytri_rook.f dsycon_rook.f dsysv_rook.f
|
||||
dsytf2_rk.f dsytrf_rk.f dsytrs_3.f
|
||||
dsytri_3.f dsytri_3x.f dsycon_3.f dsysv_rk.f
|
||||
dsysv_aa.f dsysv_aa_2stage.f dsytrf_aa.f dsytrf_aa_2stage.f dsytrs_aa.f dsytrs_aa_2stage.f
|
||||
dtbcon.f
|
||||
dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
|
||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
||||
dtptrs.f
|
||||
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
||||
dtrtrs.f dtzrzf.f dstemr.f
|
||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
||||
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
|
||||
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
|
||||
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
|
||||
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
|
||||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
|
||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f
|
||||
dgelqt.f dgelqt3.f dgemlqt.f
|
||||
dgetsls.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
|
||||
dgelq.f dlaswlq.f dlamswlq.f dgemlq.f
|
||||
dtplqt.f dtplqt2.f dtpmlqt.f
|
||||
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
|
||||
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
|
||||
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f)
|
||||
|
||||
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
|
||||
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
|
||||
dla_syrfsx_extended.f dla_syamv.f dla_syrcond.f dla_syrpvgrw.f
|
||||
dposvxx.f dporfsx.f dla_porfsx_extended.f dla_porcond.f
|
||||
dla_porpvgrw.f dgbsvxx.f dgbrfsx.f dla_gbrfsx_extended.f
|
||||
dla_gbamv.f dla_gbrcond.f dla_gbrpvgrw.f dla_lin_berr.f dlarscl2.f
|
||||
dlascl2.f dla_wwaddw.f)
|
||||
|
||||
set(ZLASRC
|
||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
||||
zgehd2.f zgehrd.f zgelq2.f zgelqf.f
|
||||
zgels.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
||||
zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
||||
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvx.f
|
||||
zgesvj.f zgejsv.f zgsvj0.f zgsvj1.f
|
||||
zgetc2.f zgetrf2.f
|
||||
zgetri.f
|
||||
zggbak.f zggbal.f
|
||||
zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f
|
||||
zggglm.f zgghrd.f zgghd3.f zgglse.f zggqrf.f zggrqf.f
|
||||
zggsvd3.f zggsvp3.f
|
||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
||||
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
|
||||
zhetf2.f zhetrd.f
|
||||
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
|
||||
zhetrs.f zhetrs2.f
|
||||
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f
|
||||
zhetrs_rook.f zhecon_rook.f zhesv_rook.f
|
||||
zhetf2_rk.f zhetrf_rk.f zhetri_3.f zhetri_3x.f
|
||||
zhetrs_3.f zhecon_3.f zhesv_rk.f
|
||||
zhesv_aa.f zhesv_aa_2stage.f zhetrf_aa.f zhetrf_aa_2stage.f zhetrs_aa.f zhetrs_aa_2stage.f
|
||||
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
|
||||
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
|
||||
zhpsvx.f
|
||||
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
|
||||
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
|
||||
zlaed0.f zlaed7.f zlaed8.f
|
||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
||||
zlahef.f zlahef_rook.f zlahef_rk.f zlahef_aa.f zlahqr.f
|
||||
zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
||||
zlangt.f zlanhb.f
|
||||
zlanhe.f
|
||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
||||
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
|
||||
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
|
||||
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
|
||||
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
|
||||
zlarcm.f zlarf.f zlarfb.f
|
||||
zlarfg.f zlarfgp.f zlarft.f
|
||||
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
||||
zlassq.f zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f
|
||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f
|
||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
||||
zposv.f zposvx.f zpotrf2.f zpotri.f zpotrs.f zpstrf.f zpstf2.f
|
||||
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
|
||||
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
|
||||
zrot.f zspcon.f zsprfs.f zspsv.f
|
||||
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
|
||||
zstegr.f zstein.f zsteqr.f zsycon.f
|
||||
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f
|
||||
zsytri2.f zsytri2x.f zsyswapr.f
|
||||
zsytrs.f zsytrs2.f
|
||||
zsyconv.f zsyconvf.f zsyconvf_rook.f
|
||||
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f zsytrs_aa.f zsytrs_aa_2stage.f
|
||||
zsytri_rook.f zsycon_rook.f zsysv_rook.f
|
||||
zsytf2_rk.f zsytrf_rk.f zsytrf_aa.f zsytrf_aa_2stage.f zsytrs_3.f
|
||||
zsytri_3.f zsytri_3x.f zsycon_3.f zsysv_rk.f zsysv_aa.f zsysv_aa_2stage.f
|
||||
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
|
||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
||||
ztprfs.f ztptri.f
|
||||
ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
||||
ztrsyl.f ztrtrs.f ztzrzf.f zung2l.f
|
||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f zunm22.f
|
||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
||||
zunmtr.f zupgtr.f
|
||||
zupmtr.f izmax1.f dzsum1.f zstemr.f
|
||||
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
|
||||
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
|
||||
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
|
||||
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
|
||||
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
|
||||
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
|
||||
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
|
||||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f
|
||||
ztplqt.f ztplqt2.f ztpmlqt.f
|
||||
zgelqt.f zgelqt3.f zgemlqt.f
|
||||
zgetsls.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
|
||||
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
|
||||
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
|
||||
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
|
||||
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f)
|
||||
|
||||
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
|
||||
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
|
||||
zla_syrfsx_extended.f zla_syamv.f zla_syrcond_c.f zla_syrcond_x.f
|
||||
zla_syrpvgrw.f zposvxx.f zporfsx.f zla_porfsx_extended.f
|
||||
zla_porcond_c.f zla_porcond_x.f zla_porpvgrw.f zgbsvxx.f zgbrfsx.f
|
||||
zla_gbrfsx_extended.f zla_gbamv.f zla_gbrcond_c.f zla_gbrcond_x.f
|
||||
zla_gbrpvgrw.f zhesvxx.f zherfsx.f zla_herfsx_extended.f
|
||||
zla_heamv.f zla_hercond_c.f zla_hercond_x.f zla_herpvgrw.f
|
||||
zla_lin_berr.f zlarscl2.f zlascl2.f zla_wwaddw.f)
|
||||
|
||||
|
||||
if(USE_XBLAS)
|
||||
set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC})
|
||||
endif()
|
||||
|
||||
list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f
|
||||
DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f
|
||||
DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f)
|
||||
list(APPEND DLASRC DEPRECATED/dgegs.f DEPRECATED/dgegv.f
|
||||
DEPRECATED/dgeqpf.f DEPRECATED/dgelsx.f DEPRECATED/dggsvd.f
|
||||
DEPRECATED/dggsvp.f DEPRECATED/dlahrd.f DEPRECATED/dlatzm.f DEPRECATED/dtzrqf.f)
|
||||
list(APPEND CLASRC DEPRECATED/cgegs.f DEPRECATED/cgegv.f
|
||||
DEPRECATED/cgeqpf.f DEPRECATED/cgelsx.f DEPRECATED/cggsvd.f
|
||||
DEPRECATED/cggsvp.f DEPRECATED/clahrd.f DEPRECATED/clatzm.f DEPRECATED/ctzrqf.f)
|
||||
list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f
|
||||
DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f
|
||||
DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f)
|
||||
message(STATUS "Building deprecated routines")
|
||||
|
||||
set(DSLASRC spotrs.f)
|
||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
||||
DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
||||
cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
||||
DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
|
||||
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
|
||||
cgesvx.f cgetc2.f cgetri.f
|
||||
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
|
||||
cgghrd.f cgglse.f cggqrf.f cggrqf.f
|
||||
DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f
|
||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
||||
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
|
||||
chetf2.f chetrd.f
|
||||
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
|
||||
chetrs.f chetrs2.f
|
||||
chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
|
||||
chgeqz.f chpcon.f chpev.f chpevd.f
|
||||
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
|
||||
chpsvx.f
|
||||
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
|
||||
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
|
||||
claed0.f claed7.f claed8.f
|
||||
claein.f claesy.f claev2.f clags2.f clagtm.f
|
||||
clahef.f clahef_rook.f clahqr.f
|
||||
DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
||||
clanhb.f clanhe.f
|
||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
||||
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
|
||||
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
|
||||
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
|
||||
clarf.f clarfb.f clarfg.f clarft.f clarfgp.f
|
||||
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
||||
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
||||
DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
||||
cposv.f cposvx.f cpstrf.f cpstf2.f
|
||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
||||
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
|
||||
crot.f cspcon.f csprfs.f cspsv.f
|
||||
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
|
||||
cstegr.f cstein.f csteqr.f
|
||||
csycon.f
|
||||
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
|
||||
csyswapr.f csytrs.f csytrs2.f csyconv.f
|
||||
csytf2_rook.f csytrf_rook.f csytrs_rook.f
|
||||
csytri_rook.f csycon_rook.f csysv_rook.f
|
||||
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
|
||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
||||
ctprfs.f ctptri.f
|
||||
ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
||||
ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f
|
||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
|
||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
||||
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
|
||||
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
|
||||
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
|
||||
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
|
||||
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
|
||||
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
|
||||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
|
||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
|
||||
cgelq.f cgelqt.f cgelqt3.f cgemlq.f cgemlqt.f cgemqr.f cgeqr.f cgetsls.f
|
||||
clamswlq.f clamtsqr.f claswlq.f clatsqr.f ctplqt.f ctplqt2.f ctpmlqt.f
|
||||
chesv_aa.f chetrf_aa.f chetrs_aa.f clahef_aa.f csytf2_rk.f clasyf_rk.f
|
||||
csytrf_rk.f csytrs_3.f csycon_3.f csytri_3.f csytri_3x.f csysv_rk.f
|
||||
chetf2_rk.f clahef_rk.f chetrf_rk.f chetrs_3.f checon_3.f chetri_3.f
|
||||
chetri_3x.f chesv_rk.f chb2st_kernels.f chbev_2stage.f chbevd_2stage.f
|
||||
chbevx_2stage.f cheev_2stage.f cheevd_2stage.f cheevr_2stage.f cheevx_2stage.f
|
||||
chegv_2stage.f chetrd_2stage.f chetrd_hb2st.F chetrd_he2hb.f clarfy.f
|
||||
)
|
||||
|
||||
set(ZCLASRC cpotrs.f)
|
||||
|
||||
set(SCATGEN slatm1.f slaran.f slarnd.f)
|
||||
set(DLASRC
|
||||
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
||||
DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
||||
dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
|
||||
dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
|
||||
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f
|
||||
dgetc2.f dgetri.f dgetrf2.f
|
||||
dggbak.f dggbal.f dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f
|
||||
dggglm.f dgghd3.f dgghrd.f dgglse.f dggqrf.f
|
||||
dggrqf.f dggsvd3.f dggsvp3.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
|
||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
||||
DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
||||
dlapll.f dlapmt.f
|
||||
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
|
||||
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
|
||||
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
|
||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
|
||||
dlargv.f dlarrv.f dlartv.f
|
||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
|
||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f
|
||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f
|
||||
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
|
||||
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
|
||||
dpbstf.f dpbsv.f dpbsvx.f
|
||||
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
|
||||
dposvx.f dpotrf2.f dpotrs.f dpstrf.f dpstf2.f
|
||||
dppcon.f dppequ.f
|
||||
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
|
||||
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
|
||||
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
|
||||
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
|
||||
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
|
||||
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
|
||||
dstevx.f
|
||||
dsycon.f dsyev.f dsyevd.f dsyevr.f
|
||||
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
|
||||
dsysv.f dsysvx.f
|
||||
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
|
||||
dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
|
||||
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
|
||||
dsytri_rook.f dsycon_rook.f dsysv_rook.f
|
||||
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
|
||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
||||
dtptrs.f
|
||||
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
||||
dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f
|
||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
||||
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
|
||||
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
|
||||
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
|
||||
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
|
||||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
|
||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
|
||||
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f dgetsls.f
|
||||
dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f dtplqt2.f dtpmlqt.f
|
||||
dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f dsytf2_rk.f dlasyf_rk.f
|
||||
dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f dsytri_3x.f dsysv_rk.f
|
||||
dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f dsbevx_2stage.f
|
||||
dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f dsyevx_2stage.f
|
||||
dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F dsytrd_sy2sb.f dlarfy.f
|
||||
)
|
||||
|
||||
set(SMATGEN slatms.f slatme.f slatmr.f slatmt.f
|
||||
slagge.f slagsy.f slakf2.f slarge.f slaror.f slarot.f slatm2.f
|
||||
slatm3.f slatm5.f slatm6.f slatm7.f slahilb.f)
|
||||
set(ZLASRC
|
||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
||||
DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgejsv.f zgelq2.f zgelqf.f
|
||||
zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
||||
DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
||||
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvj.f zgesvx.f zgetc2.f
|
||||
zgetri.f zgetrf2.f
|
||||
zggbak.f zggbal.f zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f zggglm.f
|
||||
zgghd3.f zgghrd.f zgglse.f zggqrf.f zggrqf.f
|
||||
DEPRECATED/zggsvd.f zggsvd3.f DEPRECATED/zggsvp.f zggsvp3.f
|
||||
zgsvj0.f zgsvj1.f
|
||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
||||
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
|
||||
zhetf2.f zhetrd.f
|
||||
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
|
||||
zhetrs.f zhetrs2.f
|
||||
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
|
||||
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
|
||||
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
|
||||
zhpsvx.f
|
||||
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
|
||||
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
|
||||
zlaed0.f zlaed7.f zlaed8.f
|
||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
||||
zlahef.f zlahef_rook.f zlahqr.f
|
||||
DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
||||
zlangt.f zlanhb.f
|
||||
zlanhe.f
|
||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
||||
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
|
||||
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
|
||||
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
|
||||
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
|
||||
zlarcm.f zlarf.f zlarfb.f
|
||||
zlarfg.f zlarft.f zlarfgp.f
|
||||
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
||||
zlassq.f zlasyf.f zlasyf_rook.f zlasyf_aa.f
|
||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f
|
||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
||||
zposv.f zposvx.f zpotrf2.f zpotrs.f zpstrf.f zpstf2.f
|
||||
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
|
||||
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
|
||||
zrot.f zspcon.f zsprfs.f zspsv.f
|
||||
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
|
||||
zstegr.f zstein.f zsteqr.f
|
||||
zsycon.f zsysv_aa.f
|
||||
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
|
||||
zsyswapr.f zsytrs.f zsytrs_aa.f zsytrs2.f zsyconv.f
|
||||
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
|
||||
zsytri_rook.f zsycon_rook.f zsysv_rook.f
|
||||
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
|
||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
||||
ztprfs.f ztptri.f
|
||||
ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
||||
ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f
|
||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunm22.f zunml2.f
|
||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
||||
zunmtr.f zupgtr.f
|
||||
zupmtr.f izmax1.f dzsum1.f zstemr.f
|
||||
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
|
||||
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
|
||||
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
|
||||
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
|
||||
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
|
||||
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
|
||||
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
|
||||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
|
||||
zgelq.f zgelqt.f zgelqt3.f zgemlq.f zgemlqt.f zgemqr.f zgeqr.f zgetsls.f
|
||||
zlamswlq.f zlamtsqr.f zlaswlq.f zlatsqr.f ztplqt.f ztplqt2.f ztpmlqt.f
|
||||
zhesv_aa.f zhetrf_aa.f zhetrs_aa.f zlahef_aa.f zsytf2_rk.f zlasyf_rk.f
|
||||
zsytrf_aa.f zsytrf_rk.f zsytrs_3.f zsycon_3.f zsytri_3.f zsytri_3x.f zsysv_rk.f
|
||||
zhetf2_rk.f zlahef_rk.f zhetrf_rk.f zhetrs_3.f zhecon_3.f zhetri_3.f
|
||||
zhetri_3x.f zhesv_rk.f zhb2st_kernels.f zhbev_2stage.f zhbevd_2stage.f
|
||||
zhbevx_2stage.f zheev_2stage.f zheevd_2stage.f zheevr_2stage.f
|
||||
zheevx_2stage.f zhegv_2stage.f zhetrd_2stage.f zhetrd_hb2st.F zhetrd_he2hb.f
|
||||
zlarfy.f
|
||||
)
|
||||
|
||||
set(CMATGEN clatms.f clatme.f clatmr.f clatmt.f
|
||||
clagge.f claghe.f clagsy.f clakf2.f clarge.f claror.f clarot.f
|
||||
clatm1.f clarnd.f clatm2.f clatm3.f clatm5.f clatm6.f clahilb.f slatm7.f)
|
||||
set(LA_REL_SRC ${ALLAUX})
|
||||
if (BUILD_SINGLE)
|
||||
list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
|
||||
endif ()
|
||||
|
||||
set(DZATGEN dlatm1.f dlaran.f dlarnd.f)
|
||||
if (BUILD_DOUBLE)
|
||||
list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
|
||||
endif ()
|
||||
|
||||
set(DMATGEN dlatms.f dlatme.f dlatmr.f dlatmt.f
|
||||
dlagge.f dlagsy.f dlakf2.f dlarge.f dlaror.f dlarot.f dlatm2.f
|
||||
dlatm3.f dlatm5.f dlatm6.f dlatm7.f dlahilb.f)
|
||||
if (BUILD_COMPLEX)
|
||||
list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
|
||||
endif ()
|
||||
|
||||
set(ZMATGEN zlatms.f zlatme.f zlatmr.f zlatmt.f
|
||||
zlagge.f zlaghe.f zlagsy.f zlakf2.f zlarge.f zlaror.f zlarot.f
|
||||
zlatm1.f zlarnd.f zlatm2.f zlatm3.f zlatm5.f zlatm6.f zlahilb.f dlatm7.f)
|
||||
|
||||
if(BUILD_SINGLE)
|
||||
set(LA_REL_SRC ${SLASRC} ${DSLASRC} ${ALLAUX} ${SCLAUX})
|
||||
set(LA_GEN_SRC ${SMATGEN} ${SCATGEN})
|
||||
message(STATUS "Building Single Precision")
|
||||
endif()
|
||||
if(BUILD_DOUBLE)
|
||||
set(LA_REL_SRC ${LA_REL_SRC} ${DLASRC} ${DSLASRC} ${ALLAUX} ${DZLAUX})
|
||||
set(LA_GEN_SRC ${LA_GEN_SRC} ${DMATGEN} ${DZATGEN})
|
||||
message(STATUS "Building Double Precision")
|
||||
endif()
|
||||
if(BUILD_COMPLEX)
|
||||
set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX})
|
||||
SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN})
|
||||
message(STATUS "Building Complex Precision")
|
||||
endif()
|
||||
if(BUILD_COMPLEX16)
|
||||
set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX})
|
||||
SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN})
|
||||
message(STATUS "Building Double Complex Precision")
|
||||
endif()
|
||||
if (BUILD_COMPLEX16)
|
||||
list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
|
||||
endif ()
|
||||
|
||||
# add lapack-netlib folder to the sources
|
||||
set(LA_SOURCES "")
|
||||
foreach (LA_FILE ${LA_REL_SRC})
|
||||
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
|
||||
endforeach ()
|
||||
foreach (LA_FILE ${LA_GEN_SRC})
|
||||
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/TESTING/MATGEN/${LA_FILE}")
|
||||
endforeach ()
|
||||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
set(CSRC
|
||||
set(C_SRC
|
||||
lapacke_cbbcsd.c
|
||||
lapacke_cbbcsd_work.c
|
||||
lapacke_cbdsqr.c
|
||||
@@ -46,8 +46,6 @@ set(CSRC
|
||||
lapacke_cgehrd_work.c
|
||||
lapacke_cgejsv.c
|
||||
lapacke_cgejsv_work.c
|
||||
lapacke_cgelq.c
|
||||
lapacke_cgelq_work.c
|
||||
lapacke_cgelq2.c
|
||||
lapacke_cgelq2_work.c
|
||||
lapacke_cgelqf.c
|
||||
@@ -62,18 +60,12 @@ set(CSRC
|
||||
lapacke_cgelsy_work.c
|
||||
lapacke_cgemqr.c
|
||||
lapacke_cgemqr_work.c
|
||||
lapacke_cgemlq.c
|
||||
lapacke_cgemlq_work.c
|
||||
lapacke_cgemqrt.c
|
||||
lapacke_cgemqrt_work.c
|
||||
lapacke_cgeqlf.c
|
||||
lapacke_cgeqlf_work.c
|
||||
lapacke_cgeqp3.c
|
||||
lapacke_cgeqp3_work.c
|
||||
lapacke_cgeqpf.c
|
||||
lapacke_cgeqpf_work.c
|
||||
lapacke_cgeqr.c
|
||||
lapacke_cgeqr_work.c
|
||||
lapacke_cgeqr2.c
|
||||
lapacke_cgeqr2_work.c
|
||||
lapacke_cgeqrf.c
|
||||
@@ -142,12 +134,8 @@ set(CSRC
|
||||
lapacke_cggqrf_work.c
|
||||
lapacke_cggrqf.c
|
||||
lapacke_cggrqf_work.c
|
||||
lapacke_cggsvd.c
|
||||
lapacke_cggsvd_work.c
|
||||
lapacke_cggsvd3.c
|
||||
lapacke_cggsvd3_work.c
|
||||
lapacke_cggsvp.c
|
||||
lapacke_cggsvp_work.c
|
||||
lapacke_cggsvp3.c
|
||||
lapacke_cggsvp3_work.c
|
||||
lapacke_cgtcon.c
|
||||
@@ -222,8 +210,6 @@ set(CSRC
|
||||
lapacke_chesv_work.c
|
||||
lapacke_chesv_aa.c
|
||||
lapacke_chesv_aa_work.c
|
||||
lapacke_chesv_aa_2stage.c
|
||||
lapacke_chesv_aa_2stage_work.c
|
||||
lapacke_chesv_rk.c
|
||||
lapacke_chesv_rk_work.c
|
||||
lapacke_chesvx.c
|
||||
@@ -238,8 +224,6 @@ set(CSRC
|
||||
lapacke_chetrf_rook_work.c
|
||||
lapacke_chetrf_aa.c
|
||||
lapacke_chetrf_aa_work.c
|
||||
lapacke_chetrf_aa_2stage.c
|
||||
lapacke_chetrf_aa_2stage_work.c
|
||||
lapacke_chetrf_rk.c
|
||||
lapacke_chetrf_rk_work.c
|
||||
lapacke_chetri.c
|
||||
@@ -258,9 +242,6 @@ set(CSRC
|
||||
lapacke_chetrs_rook_work.c
|
||||
lapacke_chetrs_aa.c
|
||||
lapacke_chetrs_aa_work.c
|
||||
lapacke_chetrs_aa_2stage.c
|
||||
lapacke_chetrs_aa_2stage_work.c
|
||||
lapacke_chetrf_rk.c
|
||||
lapacke_chetrs_3.c
|
||||
lapacke_chetrs_3_work.c
|
||||
lapacke_chfrk.c
|
||||
@@ -309,11 +290,6 @@ set(CSRC
|
||||
lapacke_clacp2_work.c
|
||||
lapacke_clacpy.c
|
||||
lapacke_clacpy_work.c
|
||||
lapacke_clacrm.c
|
||||
lapacke_clacrm_work.c
|
||||
lapacke_clarcm.c
|
||||
lapacke_clarcm_work.c
|
||||
lapacke_clacn2.c
|
||||
lapacke_clag2z.c
|
||||
lapacke_clag2z_work.c
|
||||
lapacke_clange.c
|
||||
@@ -342,8 +318,6 @@ set(CSRC
|
||||
lapacke_clascl_work.c
|
||||
lapacke_claset.c
|
||||
lapacke_claset_work.c
|
||||
lapacke_classq.c
|
||||
lapacke_classq_work.c
|
||||
lapacke_claswp.c
|
||||
lapacke_claswp_work.c
|
||||
lapacke_clauum.c
|
||||
@@ -462,8 +436,6 @@ set(CSRC
|
||||
lapacke_csysv_work.c
|
||||
lapacke_csysv_aa.c
|
||||
lapacke_csysv_aa_work.c
|
||||
lapacke_csysv_aa_2stage.c
|
||||
lapacke_csysv_aa_2stage_work.c
|
||||
lapacke_csysv_rk.c
|
||||
lapacke_csysv_rk_work.c
|
||||
lapacke_csysvx.c
|
||||
@@ -476,8 +448,6 @@ set(CSRC
|
||||
lapacke_csytrf_rook_work.c
|
||||
lapacke_csytrf_aa.c
|
||||
lapacke_csytrf_aa_work.c
|
||||
lapacke_csytrf_aa_2stage.c
|
||||
lapacke_csytrf_aa_2stage_work.c
|
||||
lapacke_csytrf_rk.c
|
||||
lapacke_csytrf_rk_work.c
|
||||
lapacke_csytri.c
|
||||
@@ -496,8 +466,6 @@ set(CSRC
|
||||
lapacke_csytrs_rook_work.c
|
||||
lapacke_csytrs_aa.c
|
||||
lapacke_csytrs_aa_work.c
|
||||
lapacke_csytrs_aa_2stage.c
|
||||
lapacke_csytrs_aa_2stage_work.c
|
||||
lapacke_csytrs_3.c
|
||||
lapacke_csytrs_3_work.c
|
||||
lapacke_ctbcon.c
|
||||
@@ -665,8 +633,6 @@ set(DSRC
|
||||
lapacke_dgehrd_work.c
|
||||
lapacke_dgejsv.c
|
||||
lapacke_dgejsv_work.c
|
||||
lapacke_dgelq.c
|
||||
lapacke_dgelq_work.c
|
||||
lapacke_dgelq2.c
|
||||
lapacke_dgelq2_work.c
|
||||
lapacke_dgelqf.c
|
||||
@@ -679,8 +645,6 @@ set(DSRC
|
||||
lapacke_dgelss_work.c
|
||||
lapacke_dgelsy.c
|
||||
lapacke_dgelsy_work.c
|
||||
lapacke_dgemlq.c
|
||||
lapacke_dgemlq_work.c
|
||||
lapacke_dgemqr.c
|
||||
lapacke_dgemqr_work.c
|
||||
lapacke_dgemqrt.c
|
||||
@@ -689,10 +653,6 @@ set(DSRC
|
||||
lapacke_dgeqlf_work.c
|
||||
lapacke_dgeqp3.c
|
||||
lapacke_dgeqp3_work.c
|
||||
lapacke_dgeqpf.c
|
||||
lapacke_dgeqpf_work.c
|
||||
lapacke_dgeqr.c
|
||||
lapacke_dgeqr_work.c
|
||||
lapacke_dgeqr2.c
|
||||
lapacke_dgeqr2_work.c
|
||||
lapacke_dgeqrf.c
|
||||
@@ -761,12 +721,8 @@ set(DSRC
|
||||
lapacke_dggqrf_work.c
|
||||
lapacke_dggrqf.c
|
||||
lapacke_dggrqf_work.c
|
||||
lapacke_dggsvd.c
|
||||
lapacke_dggsvd_work.c
|
||||
lapacke_dggsvd3.c
|
||||
lapacke_dggsvd3_work.c
|
||||
lapacke_dggsvp.c
|
||||
lapacke_dggsvp_work.c
|
||||
lapacke_dggsvp3.c
|
||||
lapacke_dggsvp3_work.c
|
||||
lapacke_dgtcon.c
|
||||
@@ -829,9 +785,6 @@ set(DSRC
|
||||
lapacke_dlaset_work.c
|
||||
lapacke_dlasrt.c
|
||||
lapacke_dlasrt_work.c
|
||||
lapacke_dlassq.c
|
||||
lapacke_dlassq_work.c
|
||||
lapacke_dlaswp.c
|
||||
lapacke_dlaswp.c
|
||||
lapacke_dlaswp_work.c
|
||||
lapacke_dlauum.c
|
||||
@@ -1072,8 +1025,6 @@ set(DSRC
|
||||
lapacke_dsysv_work.c
|
||||
lapacke_dsysv_aa.c
|
||||
lapacke_dsysv_aa_work.c
|
||||
lapacke_dsysv_aa_2stage.c
|
||||
lapacke_dsysv_aa_2stage_work.c
|
||||
lapacke_dsysv_rk.c
|
||||
lapacke_dsysv_rk_work.c
|
||||
lapacke_dsysvx.c
|
||||
@@ -1088,8 +1039,6 @@ set(DSRC
|
||||
lapacke_dsytrf_rook_work.c
|
||||
lapacke_dsytrf_aa.c
|
||||
lapacke_dsytrf_aa_work.c
|
||||
lapacke_dsytrf_aa_2stage.c
|
||||
lapacke_dsytrf_aa_2stage_work.c
|
||||
lapacke_dsytrf_rk.c
|
||||
lapacke_dsytrf_rk_work.c
|
||||
lapacke_dsytri.c
|
||||
@@ -1106,8 +1055,6 @@ set(DSRC
|
||||
lapacke_dsytrs2_work.c
|
||||
lapacke_dsytrs_aa.c
|
||||
lapacke_dsytrs_aa_work.c
|
||||
lapacke_dsytrs_aa_2stage.c
|
||||
lapacke_dsytrs_aa_2stage_work.c
|
||||
lapacke_dsytrs_3.c
|
||||
lapacke_dsytrs_3_work.c
|
||||
lapacke_dsytrs_work.c
|
||||
@@ -1237,8 +1184,6 @@ set(SSRC
|
||||
lapacke_sgehrd_work.c
|
||||
lapacke_sgejsv.c
|
||||
lapacke_sgejsv_work.c
|
||||
lapacke_sgelq.c
|
||||
lapacke_sgelq_work.c
|
||||
lapacke_sgelq2.c
|
||||
lapacke_sgelq2_work.c
|
||||
lapacke_sgelqf.c
|
||||
@@ -1251,8 +1196,6 @@ set(SSRC
|
||||
lapacke_sgelss_work.c
|
||||
lapacke_sgelsy.c
|
||||
lapacke_sgelsy_work.c
|
||||
lapacke_sgemlq.c
|
||||
lapacke_sgemlq_work.c
|
||||
lapacke_sgemqr.c
|
||||
lapacke_sgemqr_work.c
|
||||
lapacke_sgemqrt.c
|
||||
@@ -1261,10 +1204,6 @@ set(SSRC
|
||||
lapacke_sgeqlf_work.c
|
||||
lapacke_sgeqp3.c
|
||||
lapacke_sgeqp3_work.c
|
||||
lapacke_sgeqpf.c
|
||||
lapacke_sgeqpf_work.c
|
||||
lapacke_sgeqr.c
|
||||
lapacke_sgeqr_work.c
|
||||
lapacke_sgeqr2.c
|
||||
lapacke_sgeqr2_work.c
|
||||
lapacke_sgeqrf.c
|
||||
@@ -1333,12 +1272,8 @@ set(SSRC
|
||||
lapacke_sggqrf_work.c
|
||||
lapacke_sggrqf.c
|
||||
lapacke_sggrqf_work.c
|
||||
lapacke_sggsvd.c
|
||||
lapacke_sggsvd_work.c
|
||||
lapacke_sggsvd3.c
|
||||
lapacke_sggsvd3_work.c
|
||||
lapacke_sggsvp.c
|
||||
lapacke_sggsvp_work.c
|
||||
lapacke_sggsvp3.c
|
||||
lapacke_sggsvp3_work.c
|
||||
lapacke_sgtcon.c
|
||||
@@ -1401,8 +1336,6 @@ set(SSRC
|
||||
lapacke_slaset_work.c
|
||||
lapacke_slasrt.c
|
||||
lapacke_slasrt_work.c
|
||||
lapacke_slassq.c
|
||||
lapacke_slassq_work.c
|
||||
lapacke_slaswp.c
|
||||
lapacke_slaswp_work.c
|
||||
lapacke_slauum.c
|
||||
@@ -1639,8 +1572,6 @@ set(SSRC
|
||||
lapacke_ssysv_work.c
|
||||
lapacke_ssysv_aa.c
|
||||
lapacke_ssysv_aa_work.c
|
||||
lapacke_ssysv_aa_2stage.c
|
||||
lapacke_ssysv_aa_2stage_work.c
|
||||
lapacke_ssysv_rk.c
|
||||
lapacke_ssysv_rk_work.c
|
||||
lapacke_ssysvx.c
|
||||
@@ -1655,9 +1586,6 @@ set(SSRC
|
||||
lapacke_ssytrf_rook_work.c
|
||||
lapacke_ssytrf_aa.c
|
||||
lapacke_ssytrf_aa_work.c
|
||||
lapacke_ssytrf_aa_2stage.c
|
||||
lapacke_ssytrf_aa_2stage_work.c
|
||||
lapacke_ssytrf_rook.c
|
||||
lapacke_ssytrf_rk.c
|
||||
lapacke_ssytrf_rk_work.c
|
||||
lapacke_ssytri.c
|
||||
@@ -1674,8 +1602,6 @@ set(SSRC
|
||||
lapacke_ssytrs2_work.c
|
||||
lapacke_ssytrs_aa.c
|
||||
lapacke_ssytrs_aa_work.c
|
||||
lapacke_ssytrs_aa_2stage.c
|
||||
lapacke_ssytrs_aa_2stage_work.c
|
||||
lapacke_ssytrs_3.c
|
||||
lapacke_ssytrs_3_work.c
|
||||
lapacke_ssytrs_work.c
|
||||
@@ -1803,8 +1729,6 @@ set(ZSRC
|
||||
lapacke_zgehrd_work.c
|
||||
lapacke_zgejsv.c
|
||||
lapacke_zgejsv_work.c
|
||||
lapacke_zgelq.c
|
||||
lapacke_zgelq_work.c
|
||||
lapacke_zgelq2.c
|
||||
lapacke_zgelq2_work.c
|
||||
lapacke_zgelqf.c
|
||||
@@ -1817,8 +1741,6 @@ set(ZSRC
|
||||
lapacke_zgelss_work.c
|
||||
lapacke_zgelsy.c
|
||||
lapacke_zgelsy_work.c
|
||||
lapacke_zgemlq.c
|
||||
lapacke_zgemlq_work.c
|
||||
lapacke_zgemqr.c
|
||||
lapacke_zgemqr_work.c
|
||||
lapacke_zgemqrt.c
|
||||
@@ -1827,10 +1749,6 @@ set(ZSRC
|
||||
lapacke_zgeqlf_work.c
|
||||
lapacke_zgeqp3.c
|
||||
lapacke_zgeqp3_work.c
|
||||
lapacke_zgeqpf.c
|
||||
lapacke_zgeqpf_work.c
|
||||
lapacke_zgeqr.c
|
||||
lapacke_zgeqr_work.c
|
||||
lapacke_zgeqr2.c
|
||||
lapacke_zgeqr2_work.c
|
||||
lapacke_zgeqrf.c
|
||||
@@ -1899,12 +1817,8 @@ set(ZSRC
|
||||
lapacke_zggqrf_work.c
|
||||
lapacke_zggrqf.c
|
||||
lapacke_zggrqf_work.c
|
||||
lapacke_zggsvd.c
|
||||
lapacke_zggsvd_work.c
|
||||
lapacke_zggsvd3.c
|
||||
lapacke_zggsvd3_work.c
|
||||
lapacke_zggsvp.c
|
||||
lapacke_zggsvp_work.c
|
||||
lapacke_zggsvp3.c
|
||||
lapacke_zggsvp3_work.c
|
||||
lapacke_zgtcon.c
|
||||
@@ -1925,12 +1839,6 @@ set(ZSRC
|
||||
lapacke_zhbevd_work.c
|
||||
lapacke_zhbevx.c
|
||||
lapacke_zhbevx_work.c
|
||||
lapacke_zhbev_2stage.c
|
||||
lapacke_zhbev_2stage_work.c
|
||||
lapacke_zhbevd_2stage.c
|
||||
lapacke_zhbevd_2stage_work.c
|
||||
lapacke_zhbevx_2stage.c
|
||||
lapacke_zhbevx_2stage_work.c
|
||||
lapacke_zhbgst.c
|
||||
lapacke_zhbgst_work.c
|
||||
lapacke_zhbgv.c
|
||||
@@ -1979,8 +1887,6 @@ set(ZSRC
|
||||
lapacke_zhesv_work.c
|
||||
lapacke_zhesv_aa.c
|
||||
lapacke_zhesv_aa_work.c
|
||||
lapacke_zhesv_aa_2stage.c
|
||||
lapacke_zhesv_aa_2stage_work.c
|
||||
lapacke_zhesv_rk.c
|
||||
lapacke_zhesv_rk_work.c
|
||||
lapacke_zhesvx.c
|
||||
@@ -1995,8 +1901,6 @@ set(ZSRC
|
||||
lapacke_zhetrf_rook_work.c
|
||||
lapacke_zhetrf_aa.c
|
||||
lapacke_zhetrf_aa_work.c
|
||||
lapacke_zhetrf_aa_2stage.c
|
||||
lapacke_zhetrf_aa_2stage_work.c
|
||||
lapacke_zhetrf_rk.c
|
||||
lapacke_zhetrf_rk_work.c
|
||||
lapacke_zhetri.c
|
||||
@@ -2014,8 +1918,6 @@ set(ZSRC
|
||||
lapacke_zhetrs_work.c
|
||||
lapacke_zhetrs_aa.c
|
||||
lapacke_zhetrs_aa_work.c
|
||||
lapacke_zhetrs_aa_2stage.c
|
||||
lapacke_zhetrs_aa_2stage_work.c
|
||||
lapacke_zhetrs_3.c
|
||||
lapacke_zhetrs_3_work.c
|
||||
lapacke_zhetrs_rook_work.c
|
||||
@@ -2065,8 +1967,6 @@ set(ZSRC
|
||||
lapacke_zlacp2_work.c
|
||||
lapacke_zlacpy.c
|
||||
lapacke_zlacpy_work.c
|
||||
lapacke_zlacrm.c
|
||||
lapacke_zlacrm_work.c
|
||||
lapacke_zlag2c.c
|
||||
lapacke_zlag2c_work.c
|
||||
lapacke_zlange.c
|
||||
@@ -2081,8 +1981,6 @@ set(ZSRC
|
||||
lapacke_zlapmr_work.c
|
||||
lapacke_zlapmt.c
|
||||
lapacke_zlapmt_work.c
|
||||
lapacke_zlarcm.c
|
||||
lapacke_zlarcm_work.c
|
||||
lapacke_zlarfb.c
|
||||
lapacke_zlarfb_work.c
|
||||
lapacke_zlarfg.c
|
||||
@@ -2097,8 +1995,6 @@ set(ZSRC
|
||||
lapacke_zlascl_work.c
|
||||
lapacke_zlaset.c
|
||||
lapacke_zlaset_work.c
|
||||
lapacke_zlassq.c
|
||||
lapacke_zlassq_work.c
|
||||
lapacke_zlaswp.c
|
||||
lapacke_zlaswp_work.c
|
||||
lapacke_zlauum.c
|
||||
@@ -2217,8 +2113,6 @@ set(ZSRC
|
||||
lapacke_zsysv_work.c
|
||||
lapacke_zsysv_aa.c
|
||||
lapacke_zsysv_aa_work.c
|
||||
lapacke_zsysv_aa_2stage.c
|
||||
lapacke_zsysv_aa_2stage_work.c
|
||||
lapacke_zsysv_rk.c
|
||||
lapacke_zsysv_rk_work.c
|
||||
lapacke_zsysvx.c
|
||||
@@ -2231,8 +2125,6 @@ set(ZSRC
|
||||
lapacke_zsytrf_rook_work.c
|
||||
lapacke_zsytrf_aa.c
|
||||
lapacke_zsytrf_aa_work.c
|
||||
lapacke_zsytrf_aa_2stage.c
|
||||
lapacke_zsytrf_aa_2stage_work.c
|
||||
lapacke_zsytrf_rk.c
|
||||
lapacke_zsytrf_rk_work.c
|
||||
lapacke_zsytri.c
|
||||
@@ -2251,8 +2143,6 @@ set(ZSRC
|
||||
lapacke_zsytrs_rook_work.c
|
||||
lapacke_zsytrs_aa.c
|
||||
lapacke_zsytrs_aa_work.c
|
||||
lapacke_zsytrs_aa_2stage.c
|
||||
lapacke_zsytrs_aa_2stage_work.c
|
||||
lapacke_zsytrs_3.c
|
||||
lapacke_zsytrs_3_work.c
|
||||
lapacke_ztbcon.c
|
||||
@@ -2373,92 +2263,104 @@ set(ZSRC
|
||||
)
|
||||
|
||||
set(SRCX
|
||||
lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c
|
||||
lapacke_cgbrfsx_work.c lapacke_cporfsx_work.c lapacke_dgerfsx_work.c lapacke_sgbrfsx_work.c lapacke_ssyrfsx_work.c lapacke_zherfsx_work.c
|
||||
lapacke_cgerfsx.c lapacke_csyrfsx.c lapacke_dporfsx.c lapacke_sgerfsx.c lapacke_zgbrfsx.c lapacke_zporfsx.c
|
||||
lapacke_cgerfsx_work.c lapacke_csyrfsx_work.c lapacke_dporfsx_work.c lapacke_sgerfsx_work.c lapacke_zgbrfsx_work.c lapacke_zporfsx_work.c
|
||||
lapacke_cherfsx.c lapacke_dgbrfsx.c lapacke_dsyrfsx.c lapacke_sporfsx.c lapacke_zgerfsx.c lapacke_zsyrfsx.c
|
||||
lapacke_cherfsx_work.c lapacke_dgbrfsx_work.c lapacke_dsyrfsx_work.c lapacke_sporfsx_work.c lapacke_zgerfsx_work.c lapacke_zsyrfsx_work.c
|
||||
lapacke_cgbsvxx.c lapacke_cposvxx.c lapacke_dgesvxx.c lapacke_sgbsvxx.c lapacke_ssysvxx.c lapacke_zhesvxx.c
|
||||
lapacke_cgbsvxx_work.c lapacke_cposvxx_work.c lapacke_dgesvxx_work.c lapacke_sgbsvxx_work.c lapacke_ssysvxx_work.c lapacke_zhesvxx_work.c
|
||||
lapacke_cgesvxx.c lapacke_csysvxx.c lapacke_dposvxx.c lapacke_sgesvxx.c lapacke_zgbsvxx.c lapacke_zposvxx.c
|
||||
lapacke_cgesvxx_work.c lapacke_csysvxx_work.c lapacke_dposvxx_work.c lapacke_sgesvxx_work.c lapacke_zgbsvxx_work.c lapacke_zposvxx_work.c
|
||||
lapacke_chesvxx.c lapacke_dgbsvxx.c lapacke_dsysvxx.c lapacke_sposvxx.c lapacke_zgesvxx.c lapacke_zsysvxx.c
|
||||
lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c
|
||||
lapacke_cgbrfsx_work.c lapacke_cporfsx_work.c lapacke_dgerfsx_work.c lapacke_sgbrfsx_work.c lapacke_ssyrfsx_work.c lapacke_zherfsx_work.c
|
||||
lapacke_cgerfsx.c lapacke_csyrfsx.c lapacke_dporfsx.c lapacke_sgerfsx.c lapacke_zgbrfsx.c lapacke_zporfsx.c
|
||||
lapacke_cgerfsx_work.c lapacke_csyrfsx_work.c lapacke_dporfsx_work.c lapacke_sgerfsx_work.c lapacke_zgbrfsx_work.c lapacke_zporfsx_work.c
|
||||
lapacke_cherfsx.c lapacke_dgbrfsx.c lapacke_dsyrfsx.c lapacke_sporfsx.c lapacke_zgerfsx.c lapacke_zsyrfsx.c
|
||||
lapacke_cherfsx_work.c lapacke_dgbrfsx_work.c lapacke_dsyrfsx_work.c lapacke_sporfsx_work.c lapacke_zgerfsx_work.c lapacke_zsyrfsx_work.c
|
||||
lapacke_cgbsvxx.c lapacke_cposvxx.c lapacke_dgesvxx.c lapacke_sgbsvxx.c lapacke_ssysvxx.c lapacke_zhesvxx.c
|
||||
lapacke_cgbsvxx_work.c lapacke_cposvxx_work.c lapacke_dgesvxx_work.c lapacke_sgbsvxx_work.c lapacke_ssysvxx_work.c lapacke_zhesvxx_work.c
|
||||
lapacke_cgesvxx.c lapacke_csysvxx.c lapacke_dposvxx.c lapacke_sgesvxx.c lapacke_zgbsvxx.c lapacke_zposvxx.c
|
||||
lapacke_cgesvxx_work.c lapacke_csysvxx_work.c lapacke_dposvxx_work.c lapacke_sgesvxx_work.c lapacke_zgbsvxx_work.c lapacke_zposvxx_work.c
|
||||
lapacke_chesvxx.c lapacke_dgbsvxx.c lapacke_dsysvxx.c lapacke_sposvxx.c lapacke_zgesvxx.c lapacke_zsysvxx.c
|
||||
lapacke_chesvxx_work.c lapacke_dgbsvxx_work.c lapacke_dsysvxx_work.c lapacke_sposvxx_work.c lapacke_zgesvxx_work.c lapacke_zsysvxx_work.c
|
||||
)
|
||||
|
||||
|
||||
# FILE PARTS OF TMGLIB
|
||||
# FILE PARTS OF TMGLIB
|
||||
set(MATGEN
|
||||
lapacke_clatms.c
|
||||
lapacke_clatms_work.c
|
||||
lapacke_dlatms.c
|
||||
lapacke_dlatms_work.c
|
||||
lapacke_slatms.c
|
||||
lapacke_slatms_work.c
|
||||
lapacke_zlatms.c
|
||||
lapacke_zlatms_work.c
|
||||
lapacke_clagge.c
|
||||
lapacke_clagge_work.c
|
||||
lapacke_dlagge.c
|
||||
lapacke_dlagge_work.c
|
||||
lapacke_slagge.c
|
||||
lapacke_slagge_work.c
|
||||
lapacke_zlagge.c
|
||||
lapacke_zlagge_work.c
|
||||
lapacke_claghe.c
|
||||
lapacke_claghe_work.c
|
||||
lapacke_zlaghe.c
|
||||
lapacke_zlaghe_work.c
|
||||
lapacke_clagsy.c
|
||||
lapacke_clagsy_work.c
|
||||
lapacke_dlagsy.c
|
||||
lapacke_dlagsy_work.c
|
||||
lapacke_slagsy.c
|
||||
lapacke_slagsy_work.c
|
||||
lapacke_zlagsy.c
|
||||
lapacke_clatms.c
|
||||
lapacke_clatms_work.c
|
||||
lapacke_dlatms.c
|
||||
lapacke_dlatms_work.c
|
||||
lapacke_slatms.c
|
||||
lapacke_slatms_work.c
|
||||
lapacke_zlatms.c
|
||||
lapacke_zlatms_work.c
|
||||
lapacke_clagge.c
|
||||
lapacke_clagge_work.c
|
||||
lapacke_dlagge.c
|
||||
lapacke_dlagge_work.c
|
||||
lapacke_slagge.c
|
||||
lapacke_slagge_work.c
|
||||
lapacke_zlagge.c
|
||||
lapacke_zlagge_work.c
|
||||
lapacke_claghe.c
|
||||
lapacke_claghe_work.c
|
||||
lapacke_zlaghe.c
|
||||
lapacke_zlaghe_work.c
|
||||
lapacke_clagsy.c
|
||||
lapacke_clagsy_work.c
|
||||
lapacke_dlagsy.c
|
||||
lapacke_dlagsy_work.c
|
||||
lapacke_slagsy.c
|
||||
lapacke_slagsy_work.c
|
||||
lapacke_zlagsy.c
|
||||
lapacke_zlagsy_work.c
|
||||
lapacke_nancheck.c
|
||||
)
|
||||
|
||||
set(Utils_SRC
|
||||
lapacke_c_nancheck.c lapacke_ctr_trans.c lapacke_make_complex_float.c lapacke_zgb_nancheck.c
|
||||
lapacke_cgb_nancheck.c lapacke_d_nancheck.c lapacke_s_nancheck.c lapacke_zgb_trans.c
|
||||
lapacke_cgb_trans.c lapacke_dgb_nancheck.c lapacke_sgb_nancheck.c lapacke_zge_nancheck.c
|
||||
lapacke_cge_nancheck.c lapacke_dgb_trans.c lapacke_sgb_trans.c lapacke_zge_trans.c
|
||||
lapacke_cge_trans.c lapacke_dge_nancheck.c lapacke_sge_nancheck.c lapacke_zgg_nancheck.c
|
||||
lapacke_cgg_nancheck.c lapacke_dge_trans.c lapacke_sge_trans.c lapacke_zgg_trans.c
|
||||
lapacke_cgg_trans.c lapacke_dgg_nancheck.c lapacke_sgg_nancheck.c lapacke_zgt_nancheck.c
|
||||
lapacke_cgt_nancheck.c lapacke_dgg_trans.c lapacke_sgg_trans.c lapacke_zhb_nancheck.c
|
||||
lapacke_chb_nancheck.c lapacke_dgt_nancheck.c lapacke_sgt_nancheck.c lapacke_zhb_trans.c
|
||||
lapacke_chb_trans.c lapacke_dhs_nancheck.c lapacke_shs_nancheck.c lapacke_zhe_nancheck.c
|
||||
lapacke_che_nancheck.c lapacke_dhs_trans.c lapacke_shs_trans.c lapacke_zhe_trans.c
|
||||
lapacke_che_trans.c lapacke_dpb_nancheck.c lapacke_spb_nancheck.c lapacke_zhp_nancheck.c
|
||||
lapacke_chp_nancheck.c lapacke_dpb_trans.c lapacke_spb_trans.c lapacke_zhp_trans.c
|
||||
lapacke_chp_trans.c lapacke_dpf_nancheck.c lapacke_spf_nancheck.c lapacke_zhs_nancheck.c
|
||||
lapacke_chs_nancheck.c lapacke_dpf_trans.c lapacke_spf_trans.c lapacke_zhs_trans.c
|
||||
lapacke_chs_trans.c lapacke_dpo_nancheck.c lapacke_spo_nancheck.c lapacke_zpb_nancheck.c
|
||||
lapacke_cpb_nancheck.c lapacke_dpo_trans.c lapacke_spo_trans.c lapacke_zpb_trans.c
|
||||
lapacke_cpb_trans.c lapacke_dpp_nancheck.c lapacke_spp_nancheck.c lapacke_zpf_nancheck.c
|
||||
lapacke_cpf_nancheck.c lapacke_dpp_trans.c lapacke_spp_trans.c lapacke_zpf_trans.c
|
||||
lapacke_cpf_trans.c lapacke_dpt_nancheck.c lapacke_spt_nancheck.c lapacke_zpo_nancheck.c
|
||||
lapacke_cpo_nancheck.c lapacke_dsb_nancheck.c lapacke_ssb_nancheck.c lapacke_zpo_trans.c
|
||||
lapacke_cpo_trans.c lapacke_dsb_trans.c lapacke_ssb_trans.c lapacke_zpp_nancheck.c
|
||||
lapacke_cpp_nancheck.c lapacke_dsp_nancheck.c lapacke_ssp_nancheck.c lapacke_zpp_trans.c
|
||||
lapacke_cpp_trans.c lapacke_dsp_trans.c lapacke_ssp_trans.c lapacke_zpt_nancheck.c
|
||||
lapacke_cpt_nancheck.c lapacke_dst_nancheck.c lapacke_sst_nancheck.c lapacke_zsp_nancheck.c
|
||||
lapacke_csp_nancheck.c lapacke_dsy_nancheck.c lapacke_ssy_nancheck.c lapacke_zsp_trans.c
|
||||
lapacke_csp_trans.c lapacke_dsy_trans.c lapacke_ssy_trans.c lapacke_zst_nancheck.c
|
||||
lapacke_cst_nancheck.c lapacke_dtb_nancheck.c lapacke_stb_nancheck.c lapacke_zsy_nancheck.c
|
||||
lapacke_csy_nancheck.c lapacke_dtb_trans.c lapacke_stb_trans.c lapacke_zsy_trans.c
|
||||
lapacke_csy_trans.c lapacke_dtf_nancheck.c lapacke_stf_nancheck.c lapacke_ztb_nancheck.c
|
||||
lapacke_ctb_nancheck.c lapacke_dtf_trans.c lapacke_stf_trans.c lapacke_ztb_trans.c
|
||||
lapacke_ctb_trans.c lapacke_dtp_nancheck.c lapacke_stp_nancheck.c lapacke_ztf_nancheck.c
|
||||
lapacke_ctf_nancheck.c lapacke_dtp_trans.c lapacke_stp_trans.c lapacke_ztf_trans.c
|
||||
lapacke_ctf_trans.c lapacke_dtr_nancheck.c lapacke_str_nancheck.c lapacke_ztp_nancheck.c
|
||||
lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c
|
||||
lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c
|
||||
lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c
|
||||
lapacke_cgb_nancheck.c lapacke_dpf_nancheck.c lapacke_ssy_trans.c
|
||||
lapacke_cgb_trans.c lapacke_dpf_trans.c lapacke_stb_nancheck.c
|
||||
lapacke_cge_nancheck.c lapacke_dpo_nancheck.c lapacke_stb_trans.c
|
||||
lapacke_cge_trans.c lapacke_dpo_trans.c lapacke_stf_nancheck.c
|
||||
lapacke_cgg_nancheck.c lapacke_dpp_nancheck.c lapacke_stf_trans.c
|
||||
lapacke_cgg_trans.c lapacke_dpp_trans.c lapacke_stp_nancheck.c
|
||||
lapacke_cgt_nancheck.c lapacke_dpt_nancheck.c lapacke_stp_trans.c
|
||||
lapacke_chb_nancheck.c lapacke_dsb_nancheck.c lapacke_str_nancheck.c
|
||||
lapacke_chb_trans.c lapacke_dsb_trans.c lapacke_str_trans.c
|
||||
lapacke_che_nancheck.c lapacke_dsp_nancheck.c lapacke_xerbla.c
|
||||
lapacke_che_trans.c lapacke_dsp_trans.c lapacke_zgb_nancheck.c
|
||||
lapacke_chp_nancheck.c lapacke_dst_nancheck.c lapacke_zgb_trans.c
|
||||
lapacke_chp_trans.c lapacke_dsy_nancheck.c lapacke_zge_nancheck.c
|
||||
lapacke_chs_nancheck.c lapacke_dsy_trans.c lapacke_zge_trans.c
|
||||
lapacke_chs_trans.c lapacke_dtb_nancheck.c lapacke_zgg_nancheck.c
|
||||
lapacke_c_nancheck.c lapacke_dtb_trans.c lapacke_zgg_trans.c
|
||||
lapacke_cpb_nancheck.c lapacke_dtf_nancheck.c lapacke_zgt_nancheck.c
|
||||
lapacke_cpb_trans.c lapacke_dtf_trans.c lapacke_zhb_nancheck.c
|
||||
lapacke_cpf_nancheck.c lapacke_dtp_nancheck.c lapacke_zhb_trans.c
|
||||
lapacke_cpf_trans.c lapacke_dtp_trans.c lapacke_zhe_nancheck.c
|
||||
lapacke_cpo_nancheck.c lapacke_dtr_nancheck.c lapacke_zhe_trans.c
|
||||
lapacke_cpo_trans.c lapacke_dtr_trans.c lapacke_zhp_nancheck.c
|
||||
lapacke_cpp_nancheck.c lapacke_lsame.c lapacke_zhp_trans.c
|
||||
lapacke_cpp_trans.c lapacke_make_complex_double.c lapacke_zhs_nancheck.c
|
||||
lapacke_cpt_nancheck.c lapacke_make_complex_float.c lapacke_zhs_trans.c
|
||||
lapacke_csp_nancheck.c lapacke_sgb_nancheck.c lapacke_z_nancheck.c
|
||||
lapacke_csp_trans.c lapacke_sgb_trans.c lapacke_zpb_nancheck.c
|
||||
lapacke_cst_nancheck.c lapacke_sge_nancheck.c lapacke_zpb_trans.c
|
||||
lapacke_csy_nancheck.c lapacke_sge_trans.c lapacke_zpf_nancheck.c
|
||||
lapacke_csy_trans.c lapacke_sgg_nancheck.c lapacke_zpf_trans.c
|
||||
lapacke_ctb_nancheck.c lapacke_sgg_trans.c lapacke_zpo_nancheck.c
|
||||
lapacke_ctb_trans.c lapacke_sgt_nancheck.c lapacke_zpo_trans.c
|
||||
lapacke_ctf_nancheck.c lapacke_shs_nancheck.c lapacke_zpp_nancheck.c
|
||||
lapacke_ctf_trans.c lapacke_shs_trans.c lapacke_zpp_trans.c
|
||||
lapacke_ctp_nancheck.c lapacke_s_nancheck.c lapacke_zpt_nancheck.c
|
||||
lapacke_ctp_trans.c lapacke_spb_nancheck.c lapacke_zsp_nancheck.c
|
||||
lapacke_ctr_nancheck.c lapacke_spb_trans.c lapacke_zsp_trans.c
|
||||
lapacke_ctr_trans.c lapacke_spf_nancheck.c lapacke_zst_nancheck.c
|
||||
lapacke_dgb_nancheck.c lapacke_spf_trans.c lapacke_zsy_nancheck.c
|
||||
lapacke_dgb_trans.c lapacke_spo_nancheck.c lapacke_zsy_trans.c
|
||||
lapacke_dge_nancheck.c lapacke_spo_trans.c lapacke_ztb_nancheck.c
|
||||
lapacke_dge_trans.c lapacke_spp_nancheck.c lapacke_ztb_trans.c
|
||||
lapacke_dgg_nancheck.c lapacke_spp_trans.c lapacke_ztf_nancheck.c
|
||||
lapacke_dgg_trans.c lapacke_spt_nancheck.c lapacke_ztf_trans.c
|
||||
lapacke_dgt_nancheck.c lapacke_ssb_nancheck.c lapacke_ztp_nancheck.c
|
||||
lapacke_dhs_nancheck.c lapacke_ssb_trans.c lapacke_ztp_trans.c
|
||||
lapacke_dhs_trans.c lapacke_ssp_nancheck.c lapacke_ztr_nancheck.c
|
||||
lapacke_d_nancheck.c lapacke_ssp_trans.c lapacke_ztr_trans.c
|
||||
lapacke_dpb_nancheck.c lapacke_sst_nancheck.c
|
||||
lapacke_dpb_trans.c lapacke_ssy_nancheck.c
|
||||
)
|
||||
|
||||
set(LAPACKE_REL_SRC "")
|
||||
@@ -2478,10 +2380,6 @@ if (BUILD_COMPLEX16)
|
||||
list(APPEND LAPACKE_REL_SRC ${ZSRC})
|
||||
endif ()
|
||||
|
||||
if (BUILD_MATGEN)
|
||||
list(APPEND LAPACKE_REL_SRC ${MATGEN})
|
||||
endif ()
|
||||
|
||||
# add lapack-netlib folder to the sources
|
||||
set(LAPACKE_SOURCES "")
|
||||
foreach (LAE_FILE ${LAPACKE_REL_SRC})
|
||||
@@ -2493,6 +2391,6 @@ foreach (Utils_FILE ${Utils_SRC})
|
||||
endforeach ()
|
||||
|
||||
set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include")
|
||||
configure_file("${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h" COPYONLY)
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h")
|
||||
include_directories(${lapacke_include_dir})
|
||||
set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}")
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
||||
libsuffix=@SUFFIX64_UNDERSCORE@
|
||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
||||
|
||||
openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@
|
||||
Name: OpenBLAS
|
||||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||
Version: @OPENBLAS_VERSION@
|
||||
URL: https://github.com/xianyi/OpenBLAS
|
||||
Libs: -L${libdir} -lopenblas${libsuffix}
|
||||
Libs: -L${libdir} -lopenblas
|
||||
Cflags: -I${includedir}
|
||||
|
||||
@@ -3,6 +3,19 @@
|
||||
## Description: Ported from portion of OpenBLAS/Makefile.system
|
||||
## Detects the OS and sets appropriate variables.
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
|
||||
set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
|
||||
set(MD5SUM "md5 -r")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
|
||||
set(MD5SUM "md5 -r")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
|
||||
set(MD5SUM "md5 -n")
|
||||
endif ()
|
||||
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(EXTRALIB "${EXTRALIB} -lm")
|
||||
set(NO_EXPRECISION 1)
|
||||
@@ -43,7 +56,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
|
||||
# Ensure the correct stack alignment on Win32
|
||||
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
|
||||
endif ()
|
||||
@@ -65,7 +78,7 @@ if (CYGWIN)
|
||||
endif ()
|
||||
|
||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android")
|
||||
if (USE_THREAD)
|
||||
if (SMP)
|
||||
set(EXTRALIB "${EXTRALIB} -lpthread")
|
||||
endif ()
|
||||
endif ()
|
||||
@@ -75,7 +88,7 @@ if (QUAD_PRECISION)
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(NO_EXPRECISION 1)
|
||||
endif ()
|
||||
|
||||
|
||||
@@ -37,314 +37,112 @@
|
||||
|
||||
# CPUIDEMU = ../../cpuid/table.o
|
||||
|
||||
|
||||
if (DEFINED CPUIDEMU)
|
||||
set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
|
||||
endif ()
|
||||
|
||||
if (BUILD_KERNEL)
|
||||
if (DEFINED TARGET_CORE)
|
||||
# set the C flags for just this file
|
||||
set(GETARCH2_FLAGS "-DBUILD_KERNEL")
|
||||
set(TARGET_MAKE "Makefile_kernel.conf")
|
||||
set(TARGET_CONF "config_kernel.h")
|
||||
set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}/kernel_config/${TARGET_CORE})
|
||||
else()
|
||||
set(TARGET_MAKE "Makefile.conf")
|
||||
set(TARGET_CONF "config.h")
|
||||
set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR})
|
||||
endif ()
|
||||
|
||||
set(TARGET_CONF_TEMP "${PROJECT_BINARY_DIR}/${TARGET_CONF}.tmp")
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/c_check.cmake")
|
||||
|
||||
# c_check
|
||||
set(FU "")
|
||||
if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang"))
|
||||
set(FU "_")
|
||||
endif()
|
||||
|
||||
set(COMPILER_ID ${CMAKE_C_COMPILER_ID})
|
||||
if (${COMPILER_ID} STREQUAL "GNU")
|
||||
set(COMPILER_ID "GCC")
|
||||
endif ()
|
||||
|
||||
string(TOUPPER ${ARCH} UC_ARCH)
|
||||
|
||||
file(WRITE ${TARGET_CONF_TEMP}
|
||||
"#define OS_${HOST_OS}\t1\n"
|
||||
"#define ARCH_${UC_ARCH}\t1\n"
|
||||
"#define C_${COMPILER_ID}\t1\n"
|
||||
"#define __${BINARY}BIT__\t1\n"
|
||||
"#define FUNDERSCORE\t${FU}\n")
|
||||
|
||||
if (${HOST_OS} STREQUAL "WINDOWSSTORE")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define OS_WINNT\t1\n")
|
||||
endif ()
|
||||
|
||||
# f_check
|
||||
if (NOT NOFORTRAN)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
|
||||
endif ()
|
||||
|
||||
# Cannot run getarch on target if we are cross-compiling
|
||||
if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSSTORE"))
|
||||
# Write to config as getarch would
|
||||
# compile getarch
|
||||
set(GETARCH_SRC
|
||||
${PROJECT_SOURCE_DIR}/getarch.c
|
||||
${CPUIDEMO}
|
||||
)
|
||||
|
||||
# TODO: Set up defines that getarch sets up based on every other target
|
||||
# Perhaps this should be inside a different file as it grows larger
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define ${CORE}\n"
|
||||
"#define CHAR_CORENAME \"${CORE}\"\n")
|
||||
if ("${CORE}" STREQUAL "ARMV7")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t65536\n"
|
||||
"#define L1_DATA_LINESIZE\t32\n"
|
||||
"#define L2_SIZE\t512488\n"
|
||||
"#define L2_LINESIZE\t32\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define L2_ASSOCIATIVE\t4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n")
|
||||
set(SGEMM_UNROLL_M 4)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 4)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "ARMV8")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L2_SIZE\t262144\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define L2_ASSOCIATIVE\t32\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 4)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "CORTEXA57" OR "${CORE}" STREQUAL "CORTEXA53")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t3\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t2\n"
|
||||
"#define L2_SIZE\t262144\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t16\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "CORTEXA72" OR "${CORE}" STREQUAL "CORTEXA73")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t49152\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t3\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t2\n"
|
||||
"#define L2_SIZE\t524288\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t16\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "FALKOR")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t65536\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t3\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t128\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t2\n"
|
||||
"#define L2_SIZE\t524288\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t16\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 8)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
elseif ("${CORE}" STREQUAL "THUNDERX)
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t3\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t128\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t2\n"
|
||||
"#define L2_SIZE\t167772164\n"
|
||||
"#define L2_LINESIZE\t128\n"
|
||||
"#define L2_ASSOCIATIVE\t16\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define HAVE_VFPV4\n"
|
||||
"#define HAVE_VFPV3\n"
|
||||
"#define HAVE_VFP\n"
|
||||
"#define HAVE_NEON\n"
|
||||
"#define ARMV8\n")
|
||||
set(SGEMM_UNROLL_M 4)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 2)
|
||||
set(DGEMM_UNROLL_N 2)
|
||||
set(CGEMM_UNROLL_M 2)
|
||||
set(CGEMM_UNROLL_N 2)
|
||||
set(ZGEMM_UNROLL_M 2)
|
||||
set(ZGEMM_UNROLL_N 2)
|
||||
elseif ("${CORE}" STREQUAL "THUNDERX2T99)
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t8\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t8\n"
|
||||
"#define L2_SIZE\t262144\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t8\n"
|
||||
"#define L3_SIZE\t33554432\n"
|
||||
"#define L3_LINESIZE\t64\n"
|
||||
"#define L3_ASSOCIATIVE\t32\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n"
|
||||
"#define VULCAN\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
endif()
|
||||
if (NOT MSVC)
|
||||
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
|
||||
endif ()
|
||||
|
||||
# Or should this actually be NUM_CORES?
|
||||
if (${NUM_THREADS} GREATER 0)
|
||||
file(APPEND ${TARGET_CONF_TEMP} "#define NUM_CORES\t${NUM_THREADS}\n")
|
||||
endif()
|
||||
if (MSVC)
|
||||
#Use generic for MSVC now
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
|
||||
endif()
|
||||
|
||||
# GetArch_2nd
|
||||
foreach(float_char S;D;Q;C;Z;X)
|
||||
if (NOT DEFINED ${float_char}GEMM_UNROLL_M)
|
||||
set(${float_char}GEMM_UNROLL_M 2)
|
||||
endif()
|
||||
if (NOT DEFINED ${float_char}GEMM_UNROLL_N)
|
||||
set(${float_char}GEMM_UNROLL_N 2)
|
||||
endif()
|
||||
endforeach()
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define GEMM_MULTITHREAD_THRESHOLD\t${GEMM_MULTITHREAD_THRESHOLD}\n")
|
||||
# Move to where gen_config_h would place it
|
||||
file(MAKE_DIRECTORY ${TARGET_CONF_DIR})
|
||||
file(RENAME ${TARGET_CONF_TEMP} "${TARGET_CONF_DIR}/${TARGET_CONF}")
|
||||
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
# disable WindowsStore strict CRT checks
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
|
||||
endif ()
|
||||
|
||||
else(NOT CMAKE_CROSSCOMPILING)
|
||||
# compile getarch
|
||||
set(GETARCH_SRC
|
||||
${PROJECT_SOURCE_DIR}/getarch.c
|
||||
${CPUIDEMU}
|
||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
|
||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||
SOURCES ${GETARCH_SRC}
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GETARCH_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||
)
|
||||
|
||||
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC")
|
||||
#Use generic for MSVC now
|
||||
message("MSVC")
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
|
||||
else()
|
||||
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
|
||||
if (NOT ${GETARCH_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
||||
endif ()
|
||||
endif ()
|
||||
message(STATUS "Running getarch")
|
||||
|
||||
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
# disable WindowsStore strict CRT checks
|
||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
|
||||
|
||||
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
|
||||
|
||||
# append config data from getarch to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH_MAKE_OUT})
|
||||
|
||||
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
|
||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH2_DIR})
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GETARCH2_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||
)
|
||||
|
||||
if (NOT ${GETARCH2_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
|
||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH_DIR})
|
||||
configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY)
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH_RESULT ${GETARCH_DIR}
|
||||
SOURCES ${GETARCH_SRC}
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE GETARCH_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
|
||||
)
|
||||
|
||||
if (NOT ${GETARCH_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
||||
endif ()
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
|
||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
|
||||
|
||||
# append config data from getarch_2nd to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH2_MAKE_OUT})
|
||||
|
||||
# compile get_config_h
|
||||
set(GEN_CONFIG_H_DIR "${PROJECT_BINARY_DIR}/genconfig_h_build")
|
||||
set(GEN_CONFIG_H_BIN "gen_config_h${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
set(GEN_CONFIG_H_FLAGS "-DVERSION=\"${OpenBLAS_VERSION}\"")
|
||||
file(MAKE_DIRECTORY ${GEN_CONFIG_H_DIR})
|
||||
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GEN_CONFIG_H_RESULT ${GEN_CONFIG_H_DIR}
|
||||
SOURCES ${PROJECT_SOURCE_DIR}/gen_config_h.c
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GEN_CONFIG_H_FLAGS} -I${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE GEN_CONFIG_H_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GEN_CONFIG_H_BIN}
|
||||
)
|
||||
|
||||
if (NOT ${GEN_CONFIG_H_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling gen_config_h failed ${GEN_CONFIG_H_LOG}")
|
||||
endif ()
|
||||
message(STATUS "Running getarch")
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
|
||||
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
|
||||
|
||||
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
|
||||
|
||||
# append config data from getarch to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH_CONF_OUT})
|
||||
ParseGetArchVars(${GETARCH_MAKE_OUT})
|
||||
|
||||
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
|
||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
|
||||
file(MAKE_DIRECTORY ${GETARCH2_DIR})
|
||||
configure_file(${TARGET_CONF_TEMP} ${GETARCH2_DIR}/${TARGET_CONF} COPYONLY)
|
||||
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
|
||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
|
||||
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
|
||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
|
||||
OUTPUT_VARIABLE GETARCH2_LOG
|
||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
|
||||
)
|
||||
|
||||
if (NOT ${GETARCH2_RESULT})
|
||||
MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
|
||||
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
|
||||
|
||||
# append config data from getarch_2nd to the TARGET file and read in CMake vars
|
||||
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT})
|
||||
|
||||
configure_file(${TARGET_CONF_TEMP} ${TARGET_CONF_DIR}/${TARGET_CONF} COPYONLY)
|
||||
|
||||
ParseGetArchVars(${GETARCH2_MAKE_OUT})
|
||||
|
||||
endif()
|
||||
endif ()
|
||||
@@ -2,27 +2,13 @@
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from OpenBLAS/Makefile.system
|
||||
##
|
||||
|
||||
set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib")
|
||||
|
||||
# System detection, via CMake.
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/system_check.cmake")
|
||||
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
|
||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile
|
||||
|
||||
if(CMAKE_CROSSCOMPILING AND NOT DEFINED TARGET)
|
||||
# Detect target without running getarch
|
||||
if (ARM64)
|
||||
set(TARGET "ARMV8")
|
||||
elseif(ARM)
|
||||
set(TARGET "ARMV7") # TODO: Ask compiler which arch this is
|
||||
else()
|
||||
message(FATAL_ERROR "When cross compiling, a TARGET is required.")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Other files expect CORE, which is actually TARGET and will become TARGET_CORE for kernel build. Confused yet?
|
||||
# It seems we are meant to use TARGET as input and CORE internally as kernel.
|
||||
if(NOT DEFINED CORE AND DEFINED TARGET)
|
||||
set(CORE ${TARGET})
|
||||
endif()
|
||||
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
|
||||
|
||||
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
|
||||
if (DEFINED TARGET_CORE)
|
||||
@@ -33,7 +19,7 @@ endif ()
|
||||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
set(NO_AVX 1)
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
|
||||
set(TARGET "NEHALEM")
|
||||
endif ()
|
||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
|
||||
@@ -42,23 +28,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||
endif ()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
|
||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
endif()
|
||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
message(STATUS "Targeting the ${TARGET} architecture.")
|
||||
message(STATUS "Targetting the ${TARGET} architecture.")
|
||||
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
||||
endif ()
|
||||
|
||||
@@ -83,16 +53,21 @@ if (NO_AVX2)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
|
||||
endif ()
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} ${CMAKE_C_FLAGS_DEBUG}")
|
||||
if (CMAKE_BUILD_TYPE STREQUAL Debug)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
|
||||
endif ()
|
||||
|
||||
# TODO: let CMake handle this? -hpa
|
||||
#if (${QUIET_MAKE})
|
||||
# set(MAKE "${MAKE} -s")
|
||||
#endif()
|
||||
|
||||
if (NOT DEFINED NO_PARALLEL_MAKE)
|
||||
set(NO_PARALLEL_MAKE 0)
|
||||
endif ()
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
|
||||
|
||||
if (CMAKE_C_COMPILER STREQUAL loongcc)
|
||||
if (CMAKE_CXX_COMPILER STREQUAL loongcc)
|
||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -static")
|
||||
endif ()
|
||||
|
||||
@@ -103,44 +78,51 @@ else ()
|
||||
set(ONLY_CBLAS 0)
|
||||
endif ()
|
||||
|
||||
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
|
||||
if (NOT CMAKE_CROSSCOMPILING)
|
||||
if (NOT DEFINED NUM_CORES)
|
||||
include(ProcessorCount)
|
||||
ProcessorCount(NUM_CORES)
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
if (NOT DEFINED NUM_PARALLEL)
|
||||
set(NUM_PARALLEL 1)
|
||||
endif()
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||
|
||||
if (NOT DEFINED NUM_THREADS)
|
||||
if (DEFINED NUM_CORES AND NOT NUM_CORES EQUAL 0)
|
||||
# HT?
|
||||
set(NUM_THREADS ${NUM_CORES})
|
||||
else ()
|
||||
set(NUM_THREADS 0)
|
||||
endif ()
|
||||
endif()
|
||||
set(NUM_THREADS ${NUM_CORES})
|
||||
endif ()
|
||||
|
||||
if (${NUM_THREADS} LESS 2)
|
||||
if (${NUM_THREADS} EQUAL 1)
|
||||
set(USE_THREAD 0)
|
||||
elseif(NOT DEFINED USE_THREAD)
|
||||
set(USE_THREAD 1)
|
||||
endif ()
|
||||
|
||||
if (USE_THREAD)
|
||||
message(STATUS "Multi-threading enabled with ${NUM_THREADS} threads.")
|
||||
if (DEFINED USE_THREAD)
|
||||
if (NOT ${USE_THREAD})
|
||||
unset(SMP)
|
||||
else ()
|
||||
set(SMP 1)
|
||||
endif ()
|
||||
else ()
|
||||
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
|
||||
if (${NUM_THREADS} EQUAL 1)
|
||||
unset(SMP)
|
||||
else ()
|
||||
set(SMP 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||
if (${SMP})
|
||||
message(STATUS "SMP enabled.")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED NEED_PIC)
|
||||
set(NEED_PIC 1)
|
||||
endif ()
|
||||
|
||||
# TODO: I think CMake should be handling all this stuff -hpa
|
||||
unset(ARFLAGS)
|
||||
set(CPP "${COMPILER} -E")
|
||||
set(AR "${CROSS_SUFFIX}ar")
|
||||
set(AS "${CROSS_SUFFIX}as")
|
||||
set(LD "${CROSS_SUFFIX}ld")
|
||||
set(RANLIB "${CROSS_SUFFIX}ranlib")
|
||||
set(NM "${CROSS_SUFFIX}nm")
|
||||
set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
|
||||
set(OBJCOPY "${CROSS_SUFFIX}objcopy")
|
||||
set(OBJCONV "${CROSS_SUFFIX}objconv")
|
||||
|
||||
# OS dependent settings
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/os.cmake")
|
||||
|
||||
@@ -168,20 +150,15 @@ if (NEED_PIC)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
|
||||
if (NOT NOFORTRAN)
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
endif()
|
||||
if (${F_COMPILER} STREQUAL "SUN")
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
|
||||
else ()
|
||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
|
||||
if (DYNAMIC_OLDER)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NO_LAPACK)
|
||||
@@ -198,7 +175,7 @@ if (NO_AVX)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
if (X86)
|
||||
if (${ARCH} STREQUAL "x86")
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
|
||||
endif ()
|
||||
|
||||
@@ -206,20 +183,25 @@ if (NO_AVX2)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
|
||||
endif ()
|
||||
|
||||
if (USE_THREAD)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
# NO_AFFINITY = 1
|
||||
if (SMP)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
|
||||
|
||||
if (MIPS64)
|
||||
if (${ARCH} STREQUAL "mips64")
|
||||
if (NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
set(USE_SIMPLE_THREADED_LEVEL3 1)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_OPENMP)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
# NO_AFFINITY = 1
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
|
||||
endif ()
|
||||
|
||||
if (BIGNUMA)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
if (NO_WARMUP)
|
||||
@@ -230,10 +212,6 @@ if (CONSISTENT_FPCSR)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
|
||||
endif ()
|
||||
|
||||
if (USE_TLS)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_TLS")
|
||||
endif ()
|
||||
|
||||
# Only for development
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
|
||||
@@ -251,8 +229,6 @@ endif ()
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_PARALLEL_NUMBER=${NUM_PARALLEL}")
|
||||
|
||||
if (USE_SIMPLE_THREADED_LEVEL3)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
|
||||
endif ()
|
||||
@@ -288,7 +264,7 @@ if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
set(NO_AFFINITY 1)
|
||||
endif ()
|
||||
|
||||
if (NOT X86_64 AND NOT X86 AND NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
|
||||
set(NO_AFFINITY 1)
|
||||
endif ()
|
||||
|
||||
@@ -320,21 +296,52 @@ if (MIXED_MEMORY_ALLOCATION)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
|
||||
endif ()
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DVERSION=\"\\\"${OpenBLAS_VERSION}\\\"\"")
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
|
||||
set(TAR gtar)
|
||||
set(PATCH gpatch)
|
||||
set(GREP ggrep)
|
||||
else ()
|
||||
set(TAR tar)
|
||||
set(PATCH patch)
|
||||
set(GREP grep)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED MD5SUM)
|
||||
set(MD5SUM md5sum)
|
||||
endif ()
|
||||
|
||||
set(AWK awk)
|
||||
|
||||
set(SED sed)
|
||||
|
||||
set(REVISION "-r${OpenBLAS_VERSION}")
|
||||
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CCOMMON_OPT}")
|
||||
if (DEBUG)
|
||||
set(COMMON_OPT "${COMMON_OPT} -g")
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED COMMON_OPT)
|
||||
set(COMMON_OPT "-O2")
|
||||
endif ()
|
||||
|
||||
#For x86 32-bit
|
||||
if (DEFINED BINARY AND BINARY EQUAL 32)
|
||||
if (NOT MSVC)
|
||||
set(COMMON_OPT "${COMMON_OPT} -m32")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
|
||||
if(NOT MSVC)
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CCOMMON_OPT}")
|
||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
|
||||
endif()
|
||||
# TODO: not sure what PFLAGS is -hpa
|
||||
set(PFLAGS "${PFLAGS} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
|
||||
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
|
||||
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${FCOMMON_OPT}")
|
||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
|
||||
# TODO: not sure what FPFLAGS is -hpa
|
||||
set(FPFLAGS "${FPFLAGS} ${FCOMMON_OPT} ${COMMON_PROF}")
|
||||
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
|
||||
@@ -342,7 +349,7 @@ set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
|
||||
|
||||
#Disable -fopenmp for LAPACK Fortran codes on Windows.
|
||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parallel")
|
||||
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
|
||||
foreach (FILTER_FLAG ${FILTER_FLAGS})
|
||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
|
||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
|
||||
@@ -380,7 +387,7 @@ if (NOT DEFINED LIBSUFFIX)
|
||||
endif ()
|
||||
|
||||
if (DYNAMIC_ARCH)
|
||||
if (USE_THREAD)
|
||||
if (DEFINED SMP)
|
||||
set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
|
||||
else ()
|
||||
@@ -388,7 +395,7 @@ if (DYNAMIC_ARCH)
|
||||
set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
|
||||
endif ()
|
||||
else ()
|
||||
if (USE_THREAD)
|
||||
if (DEFINED SMP)
|
||||
set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
|
||||
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
|
||||
else ()
|
||||
@@ -419,9 +426,6 @@ if (NOT NO_LAPACK)
|
||||
if (NOT NO_LAPACKE)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
|
||||
endif ()
|
||||
if (BUILD_RELAPACK)
|
||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} ReLAPACK")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (ONLY_CBLAS)
|
||||
@@ -433,7 +437,7 @@ endif ()
|
||||
set(USE_GEMM3M 0)
|
||||
|
||||
if (DEFINED ARCH)
|
||||
if (X86 OR X86_64 OR ${ARCH} STREQUAL "ia64" OR MIPS64)
|
||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
|
||||
set(USE_GEMM3M 1)
|
||||
endif ()
|
||||
|
||||
@@ -516,3 +520,35 @@ endif ()
|
||||
# export CUFLAGS
|
||||
# export CULIB
|
||||
#endif
|
||||
|
||||
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
|
||||
#
|
||||
#.f.$(SUFFIX):
|
||||
# $(FC) $(FFLAGS) -c $< -o $(@F)
|
||||
#
|
||||
#.f.$(PSUFFIX):
|
||||
# $(FC) $(FPFLAGS) -pg -c $< -o $(@F)
|
||||
|
||||
# these are not cross-platform
|
||||
#ifdef BINARY64
|
||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1
|
||||
#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib
|
||||
#else
|
||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1/32
|
||||
#PGIPATH = /opt/pgi/linux86/7.1-5/lib
|
||||
#endif
|
||||
|
||||
#ACMLPATH = /opt/acml/4.3.0
|
||||
#ifneq ($(OSNAME), Darwin)
|
||||
#MKLPATH = /opt/intel/mkl/10.2.2.025/lib
|
||||
#else
|
||||
#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
|
||||
#endif
|
||||
#ATLASPATH = /opt/atlas/3.9.17/opteron
|
||||
#FLAMEPATH = $(HOME)/flame/lib
|
||||
#ifneq ($(OSNAME), SunOS)
|
||||
#SUNPATH = /opt/sunstudio12.1
|
||||
#else
|
||||
#SUNPATH = /opt/SUNWspro
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,87 +0,0 @@
|
||||
##
|
||||
## Author: Hank Anderson <hank@statease.com>
|
||||
## Description: Ported from the OpenBLAS/c_check perl script.
|
||||
## This is triggered by prebuild.cmake and runs before any of the code is built.
|
||||
## Creates config.h and Makefile.conf.
|
||||
|
||||
# Convert CMake vars into the format that OpenBLAS expects
|
||||
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
|
||||
if (${HOST_OS} STREQUAL "WINDOWS")
|
||||
set(HOST_OS WINNT)
|
||||
endif ()
|
||||
|
||||
if (${HOST_OS} STREQUAL "LINUX")
|
||||
# check if we're building natively on Android (TERMUX)
|
||||
EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM)
|
||||
if(${OPERATING_SYSTEM} MATCHES "Android")
|
||||
set(HOST_OS ANDROID)
|
||||
endif(${OPERATING_SYSTEM} MATCHES "Android")
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC AND WIN32)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine
|
||||
OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if(OPENBLAS_GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64")
|
||||
set(MINGW64 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Pretty thorough determination of arch. Add more if needed
|
||||
if(CMAKE_CL_64 OR MINGW64)
|
||||
set(X86_64 1)
|
||||
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
|
||||
set(X86 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*")
|
||||
set(PPC 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips64.*")
|
||||
set(MIPS64 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
set(X86_64 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
|
||||
set(X86 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
|
||||
set(ARM 1)
|
||||
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
|
||||
set(ARM64 1)
|
||||
endif()
|
||||
|
||||
if (X86_64)
|
||||
set(ARCH "x86_64")
|
||||
elseif(X86)
|
||||
set(ARCH "x86")
|
||||
elseif(PPC)
|
||||
set(ARCH "power")
|
||||
elseif(ARM)
|
||||
set(ARCH "arm")
|
||||
elseif(ARM64)
|
||||
set(ARCH "arm64")
|
||||
else()
|
||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture")
|
||||
endif ()
|
||||
|
||||
if (NOT BINARY)
|
||||
if (X86_64 OR ARM64 OR PPC OR MIPS64)
|
||||
set(BINARY 64)
|
||||
else ()
|
||||
set(BINARY 32)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
if(BINARY EQUAL 64)
|
||||
set(BINARY64 1)
|
||||
else()
|
||||
set(BINARY32 1)
|
||||
endif()
|
||||
|
||||
if (X86_64 OR X86)
|
||||
file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512)
|
||||
if (NO_AVX512 EQUAL 1)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
|
||||
endif()
|
||||
file(REMOVE "avx512.tmp" "avx512.o")
|
||||
endif()
|
||||
|
||||
@@ -202,8 +202,6 @@ function(GenerateNamedObjects sources_in)
|
||||
if (use_cblas)
|
||||
set(obj_name "cblas_${obj_name}")
|
||||
list(APPEND obj_defines "CBLAS")
|
||||
elseif (NOT "${obj_name}" MATCHES "${ARCH_SUFFIX}")
|
||||
set(obj_name "${obj_name}${ARCH_SUFFIX}")
|
||||
endif ()
|
||||
|
||||
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
|
||||
@@ -236,9 +234,7 @@ function(GenerateNamedObjects sources_in)
|
||||
|
||||
string(REPLACE ";" "\n#define " define_source "${obj_defines}")
|
||||
string(REPLACE "=" " " define_source "${define_source}")
|
||||
file(WRITE ${new_source_file}.tmp "#define ${define_source}\n#include \"${old_source_file}\"")
|
||||
configure_file(${new_source_file}.tmp ${new_source_file} COPYONLY)
|
||||
file(REMOVE ${new_source_file}.tmp)
|
||||
file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
|
||||
list(APPEND SRC_LIST_OUT ${new_source_file})
|
||||
|
||||
endforeach ()
|
||||
|
||||
82
common.h
82
common.h
@@ -93,7 +93,7 @@ extern "C" {
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_ANDROID)
|
||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID)
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
@@ -105,10 +105,6 @@ extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef OS_HAIKU
|
||||
#define NO_SYSV_IPC
|
||||
#endif
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
#ifdef ATOM
|
||||
#define GOTO_ATOM ATOM
|
||||
@@ -183,7 +179,7 @@ extern "C" {
|
||||
|
||||
#define ALLOCA_ALIGN 63UL
|
||||
|
||||
#define NUM_BUFFERS MAX(50,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER))
|
||||
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
|
||||
|
||||
#ifdef NEEDBUNDERSCORE
|
||||
#define BLASFUNC(FUNC) FUNC##_
|
||||
@@ -257,14 +253,8 @@ typedef unsigned long BLASULONG;
|
||||
|
||||
#ifdef USE64BITINT
|
||||
typedef BLASLONG blasint;
|
||||
#if defined(OS_WINDOWS) && defined(__64BIT__)
|
||||
#define blasabs(x) llabs(x)
|
||||
#else
|
||||
#define blasabs(x) labs(x)
|
||||
#endif
|
||||
#else
|
||||
typedef int blasint;
|
||||
#define blasabs(x) abs(x)
|
||||
#endif
|
||||
#else
|
||||
#ifdef USE64BITINT
|
||||
@@ -505,33 +495,6 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
#ifndef __cplusplus
|
||||
#include <complex.h>
|
||||
#endif
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_double(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "param.h"
|
||||
#include "common_param.h"
|
||||
|
||||
@@ -561,6 +524,31 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
#include <stdio.h>
|
||||
#endif // NOINCLUDE
|
||||
|
||||
/* C99 supports complex floating numbers natively, which GCC also offers as an
|
||||
extension since version 3.0. If neither are available, use a compatible
|
||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
|
||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
|
||||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
|
||||
#define OPENBLAS_COMPLEX_C99
|
||||
#ifndef __cplusplus
|
||||
#include <complex.h>
|
||||
#endif
|
||||
typedef float _Complex openblas_complex_float;
|
||||
typedef double _Complex openblas_complex_double;
|
||||
typedef xdouble _Complex openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
|
||||
#else
|
||||
#define OPENBLAS_COMPLEX_STRUCT
|
||||
typedef struct { float real, imag; } openblas_complex_float;
|
||||
typedef struct { double real, imag; } openblas_complex_double;
|
||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
|
||||
#define openblas_make_complex_float(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_double(real, imag) {(real), (imag)}
|
||||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
|
||||
#endif
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
|
||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
|
||||
@@ -652,7 +640,6 @@ void gotoblas_profile_init(void);
|
||||
void gotoblas_profile_quit(void);
|
||||
|
||||
#ifdef USE_OPENMP
|
||||
|
||||
#ifndef C_MSVC
|
||||
int omp_in_parallel(void);
|
||||
int omp_get_num_procs(void);
|
||||
@@ -660,21 +647,6 @@ int omp_get_num_procs(void);
|
||||
__declspec(dllimport) int __cdecl omp_in_parallel(void);
|
||||
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
|
||||
#endif
|
||||
|
||||
#if (__STDC_VERSION__ >= 201112L)
|
||||
#if defined(C_GCC) && ( __GNUC__ < 7)
|
||||
// workaround for GCC bug 65467
|
||||
#ifndef _Atomic
|
||||
#define _Atomic volatile
|
||||
#endif
|
||||
#endif
|
||||
#include <stdatomic.h>
|
||||
#else
|
||||
#ifndef _Atomic
|
||||
#define _Atomic volatile
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
#ifdef __ELF__
|
||||
int omp_in_parallel (void) __attribute__ ((weak));
|
||||
|
||||
@@ -47,14 +47,6 @@ __global__ void cuda_dgemm_kernel(int, int, int, double *, double *, double *);
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern void sgemm_kernel_direct(BLASLONG M, BLASLONG N, BLASLONG K,
|
||||
float * A, BLASLONG strideA,
|
||||
float * B, BLASLONG strideB,
|
||||
float * R, BLASLONG strideR);
|
||||
|
||||
extern int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
|
||||
|
||||
|
||||
int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,
|
||||
|
||||
@@ -94,7 +94,7 @@ static inline unsigned int rpcc(void){
|
||||
#define RPCC_DEFINED
|
||||
|
||||
#ifndef NO_AFFINITY
|
||||
//#define WHEREAMI
|
||||
#define WHEREAMI
|
||||
static inline int WhereAmI(void){
|
||||
int ret=0;
|
||||
__asm__ __volatile__(".set push \n"
|
||||
|
||||
@@ -333,8 +333,8 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*casum_k) (BLASLONG, float *, BLASLONG);
|
||||
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float _Complex (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float _Complex (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
|
||||
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
@@ -496,8 +496,8 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
|
||||
double (*znrm2_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*zasum_k) (BLASLONG, double *, BLASLONG);
|
||||
int (*zcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
openblas_complex_double (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
openblas_complex_double (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double _Complex (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double _Complex (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int (*zdrot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
|
||||
|
||||
int (*zaxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
@@ -661,8 +661,8 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble (*xnrm2_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble (*xasum_k) (BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
openblas_complex_xdouble (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
openblas_complex_xdouble (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble _Complex (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
xdouble _Complex (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int (*xqrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
|
||||
|
||||
int (*xaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
@@ -888,7 +888,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
} gotoblas_t;
|
||||
|
||||
|
||||
@@ -47,15 +47,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
* - large enough to support all architectures and kernel
|
||||
* Chosing a too small SIZE will lead to a stack smashing.
|
||||
*/
|
||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
||||
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
||||
/* do not restore all register */ \
|
||||
volatile int stack_alloc_size = SIZE; \
|
||||
if (stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) stack_alloc_size = 0; \
|
||||
STACK_ALLOC_PROTECT_SET \
|
||||
/* Avoid declaring an array of length 0 */ \
|
||||
TYPE stack_buffer[stack_alloc_size ? stack_alloc_size : 1] \
|
||||
__attribute__((aligned(0x20))); \
|
||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
||||
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
||||
/* do not restore all register */ \
|
||||
volatile int stack_alloc_size = SIZE; \
|
||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
|
||||
stack_alloc_size = 0; \
|
||||
STACK_ALLOC_PROTECT_SET \
|
||||
TYPE stack_buffer[stack_alloc_size] __attribute__((aligned(0x20))); \
|
||||
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
|
||||
#else
|
||||
//Original OpenBLAS/GotoBLAS codes.
|
||||
|
||||
10
common_x86.h
10
common_x86.h
@@ -178,13 +178,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
result = x/y;
|
||||
return result;
|
||||
#else
|
||||
#if (MAX_CPU_NUMBER > 64)
|
||||
if ( y > 64) {
|
||||
result = x/y;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
y = blas_quick_divide_table[y];
|
||||
|
||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
||||
@@ -333,7 +327,7 @@ REALNAME:
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(__ELF__)
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 16; \
|
||||
|
||||
@@ -60,13 +60,8 @@
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define MB do { __asm__ __volatile__("": : :"memory"); } while (0)
|
||||
#define WMB do { __asm__ __volatile__("": : :"memory"); } while (0)
|
||||
#else
|
||||
#define MB do {} while (0)
|
||||
#define WMB do {} while (0)
|
||||
#endif
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
@@ -201,13 +196,6 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
|
||||
if (y <= 1) return x;
|
||||
|
||||
#if (MAX_CPU_NUMBER > 64)
|
||||
if (y > 64) {
|
||||
result = x / y;
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
y = blas_quick_divide_table[y];
|
||||
|
||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
||||
@@ -415,7 +403,7 @@ REALNAME:
|
||||
#define EPILOGUE .end
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(__ELF__) || defined(C_PGI)
|
||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
|
||||
#define PROLOGUE \
|
||||
.text; \
|
||||
.align 512; \
|
||||
|
||||
3
cpuid.h
3
cpuid.h
@@ -115,7 +115,6 @@
|
||||
#define CORE_STEAMROLLER 25
|
||||
#define CORE_EXCAVATOR 26
|
||||
#define CORE_ZEN 27
|
||||
#define CORE_SKYLAKEX 28
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
@@ -138,7 +137,6 @@
|
||||
#define HAVE_AVX (1 << 18)
|
||||
#define HAVE_FMA4 (1 << 19)
|
||||
#define HAVE_FMA3 (1 << 20)
|
||||
#define HAVE_AVX512VL (1 << 21)
|
||||
|
||||
#define CACHE_INFO_L1_I 1
|
||||
#define CACHE_INFO_L1_D 2
|
||||
@@ -213,6 +211,5 @@ typedef struct {
|
||||
#define CPUTYPE_STEAMROLLER 49
|
||||
#define CPUTYPE_EXCAVATOR 50
|
||||
#define CPUTYPE_ZEN 51
|
||||
#define CPUTYPE_SKYLAKEX 52
|
||||
|
||||
#endif
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
#define CPU_CORTEXA15 4
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"UNKOWN",
|
||||
"ARMV6",
|
||||
"ARMV7",
|
||||
"CORTEXA9",
|
||||
|
||||
130
cpuid_arm64.c
130
cpuid_arm64.c
@@ -29,37 +29,25 @@
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV8 1
|
||||
// Arm
|
||||
#define CPU_CORTEXA53 2
|
||||
#define CPU_CORTEXA57 3
|
||||
#define CPU_CORTEXA72 4
|
||||
#define CPU_CORTEXA73 5
|
||||
// Qualcomm
|
||||
#define CPU_FALKOR 6
|
||||
// Cavium
|
||||
#define CPU_THUNDERX 7
|
||||
#define CPU_THUNDERX2T99 8
|
||||
#define CPU_CORTEXA57 2
|
||||
#define CPU_VULCAN 3
|
||||
#define CPU_THUNDERX 4
|
||||
#define CPU_THUNDERX2T99 5
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"ARMV8" ,
|
||||
"CORTEXA53",
|
||||
"CORTEXA57",
|
||||
"CORTEXA72",
|
||||
"CORTEXA73",
|
||||
"FALKOR",
|
||||
"VULCAN",
|
||||
"THUNDERX",
|
||||
"THUNDERX2T99"
|
||||
};
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
"unknown",
|
||||
"armv8",
|
||||
"cortexa53",
|
||||
"armv8" ,
|
||||
"cortexa57",
|
||||
"cortexa72",
|
||||
"cortexa73",
|
||||
"falkor",
|
||||
"vulcan",
|
||||
"thunderx",
|
||||
"thunderx2t99"
|
||||
};
|
||||
@@ -126,24 +114,13 @@ int detect(void)
|
||||
|
||||
fclose(infile);
|
||||
if(cpu_part != NULL && cpu_implementer != NULL) {
|
||||
// Arm
|
||||
if (strstr(cpu_implementer, "0x41")) {
|
||||
if (strstr(cpu_part, "0xd03"))
|
||||
return CPU_CORTEXA53;
|
||||
else if (strstr(cpu_part, "0xd07"))
|
||||
return CPU_CORTEXA57;
|
||||
else if (strstr(cpu_part, "0xd08"))
|
||||
return CPU_CORTEXA72;
|
||||
else if (strstr(cpu_part, "0xd09"))
|
||||
return CPU_CORTEXA73;
|
||||
}
|
||||
// Qualcomm
|
||||
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
|
||||
return CPU_FALKOR;
|
||||
// Cavium
|
||||
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0a1"))
|
||||
if (strstr(cpu_part, "0xd07") && strstr(cpu_implementer, "0x41"))
|
||||
return CPU_CORTEXA57;
|
||||
else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42"))
|
||||
return CPU_VULCAN;
|
||||
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43"))
|
||||
return CPU_THUNDERX;
|
||||
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
|
||||
else if (strstr(cpu_part, "0xFFF") && strstr(cpu_implementer, "0x43")) /* TODO */
|
||||
return CPU_THUNDERX2T99;
|
||||
}
|
||||
|
||||
@@ -165,7 +142,7 @@ int detect(void)
|
||||
if(p != NULL)
|
||||
{
|
||||
|
||||
if ((strstr(p, "AArch64")) || (strstr(p, "8")))
|
||||
if (strstr(p, "AArch64"))
|
||||
{
|
||||
return CPU_ARMV8;
|
||||
|
||||
@@ -202,63 +179,64 @@ void get_subdirname(void)
|
||||
void get_cpuconfig(void)
|
||||
{
|
||||
|
||||
// All arches should define ARMv8
|
||||
printf("#define ARMV8\n");
|
||||
printf("#define HAVE_NEON\n"); // This shouldn't be necessary
|
||||
printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_CORTEXA53:
|
||||
printf("#define %s\n", cpuname[d]);
|
||||
// Fall-through
|
||||
case CPU_ARMV8:
|
||||
// Minimum parameters for ARMv8 (based on A53)
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 262144\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
printf("#define ARMV8\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 262144\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
case CPU_VULCAN:
|
||||
printf("#define VULCAN \n");
|
||||
printf("#define HAVE_VFP \n");
|
||||
printf("#define HAVE_VFPV3 \n");
|
||||
printf("#define HAVE_NEON \n");
|
||||
printf("#define HAVE_VFPV4 \n");
|
||||
printf("#define L1_CODE_SIZE 32768 \n");
|
||||
printf("#define L1_CODE_LINESIZE 64 \n");
|
||||
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
|
||||
printf("#define L1_DATA_SIZE 32768 \n");
|
||||
printf("#define L1_DATA_LINESIZE 64 \n");
|
||||
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
|
||||
printf("#define L2_SIZE 262144 \n");
|
||||
printf("#define L2_LINESIZE 64 \n");
|
||||
printf("#define L2_ASSOCIATIVE 8 \n");
|
||||
printf("#define L3_SIZE 33554432 \n");
|
||||
printf("#define L3_LINESIZE 64 \n");
|
||||
printf("#define L3_ASSOCIATIVE 32 \n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||
printf("#define DTB_SIZE 4096 \n");
|
||||
break;
|
||||
|
||||
case CPU_CORTEXA57:
|
||||
case CPU_CORTEXA72:
|
||||
case CPU_CORTEXA73:
|
||||
// Common minimum settings for these Arm cores
|
||||
// Can change a lot, but we need to be conservative
|
||||
// TODO: detect info from /sys if possible
|
||||
printf("#define %s\n", cpuname[d]);
|
||||
printf("#define CORTEXA57\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
printf("#define HAVE_NEON\n");
|
||||
printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_CODE_SIZE 49152\n");
|
||||
printf("#define L1_CODE_LINESIZE 64\n");
|
||||
printf("#define L1_CODE_ASSOCIATIVE 3\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L1_DATA_ASSOCIATIVE 2\n");
|
||||
printf("#define L2_SIZE 524288\n");
|
||||
printf("#define L2_SIZE 2097152\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define L2_ASSOCIATIVE 16\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
|
||||
case CPU_FALKOR:
|
||||
printf("#define FALKOR\n");
|
||||
printf("#define L1_CODE_SIZE 65536\n");
|
||||
printf("#define L1_CODE_LINESIZE 64\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 128\n");
|
||||
printf("#define L2_SIZE 524288\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 16\n");
|
||||
break;
|
||||
|
||||
case CPU_THUNDERX:
|
||||
printf("#define ARMV8\n");
|
||||
printf("#define THUNDERX\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 128\n");
|
||||
@@ -270,7 +248,11 @@ void get_cpuconfig(void)
|
||||
break;
|
||||
|
||||
case CPU_THUNDERX2T99:
|
||||
printf("#define THUNDERX2T99 \n");
|
||||
printf("#define VULCAN \n");
|
||||
printf("#define HAVE_VFP \n");
|
||||
printf("#define HAVE_VFPV3 \n");
|
||||
printf("#define HAVE_NEON \n");
|
||||
printf("#define HAVE_VFPV4 \n");
|
||||
printf("#define L1_CODE_SIZE 32768 \n");
|
||||
printf("#define L1_CODE_LINESIZE 64 \n");
|
||||
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
|
||||
|
||||
60
cpuid_mips.c
60
cpuid_mips.c
@@ -72,12 +72,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_P5600 1
|
||||
#define CPU_1004K 2
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"P5600",
|
||||
"1004K"
|
||||
"UNKOWN",
|
||||
"P5600"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
@@ -92,7 +90,7 @@ int detect(void){
|
||||
if (!strncmp("cpu", buffer, 3)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
#if 0
|
||||
fprintf(stderr, "%s \n", p);
|
||||
fprintf(stderr, "%s\n", p);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
@@ -101,13 +99,43 @@ int detect(void){
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL){
|
||||
if (strstr(p, "5600")) {
|
||||
return CPU_P5600;
|
||||
} else if (strstr(p, "1004K")) {
|
||||
return CPU_1004K;
|
||||
} else
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}else if (strstr(p, "Loongson-3")){
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("system type", buffer, 11)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
if (strstr(p, "loongson3a"))
|
||||
return CPU_LOONGSON3A;
|
||||
}else{
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
}
|
||||
//Check model name for Loongson3
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
p = (char *)NULL;
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("model name", buffer, 10)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
if(p != NULL){
|
||||
if (strstr(p, "Loongson-3A")){
|
||||
return CPU_LOONGSON3A;
|
||||
}else if(strstr(p, "Loongson-3B")){
|
||||
return CPU_LOONGSON3B;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
@@ -121,7 +149,7 @@ void get_architecture(void){
|
||||
}
|
||||
|
||||
void get_subarchitecture(void){
|
||||
if(detect()==CPU_P5600|| detect()==CPU_1004K){
|
||||
if(detect()==CPU_P5600){
|
||||
printf("P5600");
|
||||
}else{
|
||||
printf("UNKNOWN");
|
||||
@@ -142,14 +170,6 @@ void get_cpuconfig(void){
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
} else if (detect()==CPU_1004K) {
|
||||
printf("#define MIPS1004K\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 26144\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 8\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
}else{
|
||||
printf("#define UNKNOWN\n");
|
||||
}
|
||||
@@ -158,8 +178,6 @@ void get_cpuconfig(void){
|
||||
void get_libname(void){
|
||||
if(detect()==CPU_P5600) {
|
||||
printf("p5600\n");
|
||||
} else if (detect()==CPU_1004K) {
|
||||
printf("1004K\n");
|
||||
}else{
|
||||
printf("mips\n");
|
||||
}
|
||||
|
||||
@@ -76,16 +76,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define CPU_LOONGSON3B 3
|
||||
#define CPU_I6400 4
|
||||
#define CPU_P6600 5
|
||||
#define CPU_I6500 6
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
"UNKOWN",
|
||||
"SICORTEX",
|
||||
"LOONGSON3A",
|
||||
"LOONGSON3B",
|
||||
"I6400",
|
||||
"P6600",
|
||||
"I6500"
|
||||
"P6600"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
@@ -167,8 +165,6 @@ void get_subarchitecture(void){
|
||||
printf("I6400");
|
||||
}else if(detect()==CPU_P6600){
|
||||
printf("P6600");
|
||||
}else if(detect()==CPU_I6500){
|
||||
printf("I6500");
|
||||
}else{
|
||||
printf("SICORTEX");
|
||||
}
|
||||
@@ -215,15 +211,6 @@ void get_cpuconfig(void){
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}else if(detect()==CPU_I6500){
|
||||
printf("#define I6500\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
}else{
|
||||
printf("#define SICORTEX\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
@@ -245,8 +232,6 @@ void get_libname(void){
|
||||
printf("i6400\n");
|
||||
}else if(detect()==CPU_P6600) {
|
||||
printf("p6600\n");
|
||||
}else if(detect()==CPU_I6500) {
|
||||
printf("i6500\n");
|
||||
}else{
|
||||
printf("mips64\n");
|
||||
}
|
||||
|
||||
@@ -56,7 +56,6 @@
|
||||
#define CPUTYPE_CELL 6
|
||||
#define CPUTYPE_PPCG4 7
|
||||
#define CPUTYPE_POWER8 8
|
||||
#define CPUTYPE_POWER9 9
|
||||
|
||||
char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
@@ -67,8 +66,7 @@ char *cpuname[] = {
|
||||
"POWER6",
|
||||
"CELL",
|
||||
"PPCG4",
|
||||
"POWER8",
|
||||
"POWER9"
|
||||
"POWER8"
|
||||
};
|
||||
|
||||
char *lowercpuname[] = {
|
||||
@@ -80,8 +78,7 @@ char *lowercpuname[] = {
|
||||
"power6",
|
||||
"cell",
|
||||
"ppcg4",
|
||||
"power8",
|
||||
"power9"
|
||||
"power8"
|
||||
};
|
||||
|
||||
char *corename[] = {
|
||||
@@ -93,8 +90,7 @@ char *corename[] = {
|
||||
"POWER6",
|
||||
"CELL",
|
||||
"PPCG4",
|
||||
"POWER8",
|
||||
"POWER8"
|
||||
"POWER8"
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
@@ -124,7 +120,6 @@ int detect(void){
|
||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
|
||||
@@ -132,33 +127,6 @@ int detect(void){
|
||||
#endif
|
||||
|
||||
#ifdef _AIX
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
|
||||
p = (char *)NULL;
|
||||
infile = popen("prtconf|grep 'Processor Type'", "r");
|
||||
while (fgets(buffer, sizeof(buffer), infile)){
|
||||
if (!strncmp("Pro", buffer, 3)){
|
||||
p = strchr(buffer, ':') + 2;
|
||||
#if 0
|
||||
fprintf(stderr, "%s\n", p);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pclose(infile);
|
||||
|
||||
if (!strncasecmp(p, "POWER3", 6)) return CPUTYPE_POWER3;
|
||||
if (!strncasecmp(p, "POWER4", 6)) return CPUTYPE_POWER4;
|
||||
if (!strncasecmp(p, "PPC970", 6)) return CPUTYPE_PPC970;
|
||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
return CPUTYPE_POWER5;
|
||||
#endif
|
||||
|
||||
@@ -174,52 +142,6 @@ int detect(void){
|
||||
|
||||
return CPUTYPE_PPC970;
|
||||
#endif
|
||||
|
||||
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
|
||||
int id;
|
||||
__asm __volatile("mfpvr %0" : "=r"(id));
|
||||
switch ( id >> 16 ) {
|
||||
case 0x4e: // POWER9
|
||||
return CPUTYPE_POWER8;
|
||||
break;
|
||||
case 0x4d:
|
||||
case 0x4b: // POWER8/8E
|
||||
return CPUTYPE_POWER8;
|
||||
break;
|
||||
case 0x4a:
|
||||
case 0x3f: // POWER7/7E
|
||||
return CPUTYPE_POWER6;
|
||||
break;
|
||||
case 0x3e:
|
||||
return CPUTYPE_POWER6;
|
||||
break;
|
||||
case 0x3a:
|
||||
return CPUTYPE_POWER5;
|
||||
break;
|
||||
case 0x35:
|
||||
case 0x38: // POWER4 /4+
|
||||
return CPUTYPE_POWER4;
|
||||
break;
|
||||
case 0x40:
|
||||
case 0x41: // POWER3 /3+
|
||||
return CPUTYPE_POWER3;
|
||||
break;
|
||||
case 0x39:
|
||||
case 0x3c:
|
||||
case 0x44:
|
||||
case 0x45:
|
||||
return CPUTYPE_PPC970;
|
||||
break;
|
||||
case 0x70:
|
||||
return CPUTYPE_CELL;
|
||||
break;
|
||||
case 0x8003:
|
||||
return CPUTYPE_PPCG4;
|
||||
break;
|
||||
default:
|
||||
return CPUTYPE_UNKNOWN;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void get_architecture(void){
|
||||
|
||||
@@ -49,7 +49,6 @@ void get_subdirname(void){
|
||||
}
|
||||
|
||||
void get_cpuconfig(void){
|
||||
printf("#define SPARC\n");
|
||||
printf("#define V9\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 32\n");
|
||||
}
|
||||
@@ -57,8 +56,3 @@ void get_cpuconfig(void){
|
||||
void get_libname(void){
|
||||
printf("v9\n");
|
||||
}
|
||||
|
||||
char *get_corename(void){
|
||||
return "sparc";
|
||||
}
|
||||
|
||||
|
||||
177
cpuid_x86.c
177
cpuid_x86.c
@@ -50,8 +50,6 @@
|
||||
#ifdef NO_AVX
|
||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
|
||||
#define CORE_HASWELL CORE_NEHALEM
|
||||
#define CPUTYPE_SKYLAKEX CPUTYPE_NEHALEM
|
||||
#define CORE_SKYLAKEX CORE_NEHALEM
|
||||
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
|
||||
#define CORE_SANDYBRIDGE CORE_NEHALEM
|
||||
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
|
||||
@@ -73,23 +71,12 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
|
||||
*edx = cpuInfo[3];
|
||||
}
|
||||
|
||||
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
|
||||
{
|
||||
int cpuInfo[4] = {-1};
|
||||
__cpuidex(cpuInfo, op, count);
|
||||
*eax = cpuInfo[0];
|
||||
*ebx = cpuInfo[1];
|
||||
*ecx = cpuInfo[2];
|
||||
*edx = cpuInfo[3];
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#ifndef CPUIDEMU
|
||||
|
||||
#if defined(__APPLE__) && defined(__i386__)
|
||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
|
||||
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
|
||||
#else
|
||||
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
@@ -103,19 +90,6 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
|
||||
#endif
|
||||
}
|
||||
|
||||
static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
__asm__ __volatile__
|
||||
("mov %%ebx, %%edi;"
|
||||
"cpuid;"
|
||||
"xchgl %%ebx, %%edi;"
|
||||
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
|
||||
#else
|
||||
__asm__ __volatile__
|
||||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
@@ -159,10 +133,6 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
|
||||
*edx = idlist[current].d;
|
||||
}
|
||||
|
||||
void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
|
||||
return cpuid (op, eax, ebx, ecx, edx);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // _MSC_VER
|
||||
@@ -342,9 +312,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
|
||||
|
||||
if (cpuid_level > 1) {
|
||||
int numcalls =0 ;
|
||||
|
||||
cpuid(2, &eax, &ebx, &ecx, &edx);
|
||||
numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
|
||||
|
||||
info[ 0] = BITMASK(eax, 8, 0xff);
|
||||
info[ 1] = BITMASK(eax, 16, 0xff);
|
||||
info[ 2] = BITMASK(eax, 24, 0xff);
|
||||
@@ -365,6 +335,7 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
info[14] = BITMASK(edx, 24, 0xff);
|
||||
|
||||
for (i = 0; i < 15; i++){
|
||||
|
||||
switch (info[i]){
|
||||
|
||||
/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
|
||||
@@ -666,13 +637,12 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
LD1.linesize = 64;
|
||||
break;
|
||||
case 0x63 :
|
||||
DTB.size = 2048;
|
||||
DTB.associative = 4;
|
||||
DTB.linesize = 32;
|
||||
LDTB.size = 4096;
|
||||
LDTB.associative= 4;
|
||||
LDTB.linesize = 32;
|
||||
break;
|
||||
DTB.size = 2048;
|
||||
DTB.associative = 4;
|
||||
DTB.linesize = 32;
|
||||
LDTB.size = 4096;
|
||||
LDTB.associative= 4;
|
||||
LDTB.linesize = 32;
|
||||
case 0x66 :
|
||||
LD1.size = 8;
|
||||
LD1.associative = 4;
|
||||
@@ -705,13 +675,12 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
LC1.associative = 8;
|
||||
break;
|
||||
case 0x76 :
|
||||
ITB.size = 2048;
|
||||
ITB.associative = 0;
|
||||
ITB.linesize = 8;
|
||||
LITB.size = 4096;
|
||||
LITB.associative= 0;
|
||||
LITB.linesize = 8;
|
||||
break;
|
||||
ITB.size = 2048;
|
||||
ITB.associative = 0;
|
||||
ITB.linesize = 8;
|
||||
LITB.size = 4096;
|
||||
LITB.associative= 0;
|
||||
LITB.linesize = 8;
|
||||
case 0x77 :
|
||||
LC1.size = 16;
|
||||
LC1.associative = 4;
|
||||
@@ -922,67 +891,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
}
|
||||
|
||||
if (get_vendor() == VENDOR_INTEL) {
|
||||
if(LD1.size<=0 || LC1.size<=0){
|
||||
//If we didn't detect L1 correctly before,
|
||||
int count;
|
||||
for (count=0;count <4;count++) {
|
||||
cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
|
||||
switch (eax &0x1f) {
|
||||
case 0:
|
||||
continue;
|
||||
case 1:
|
||||
case 3:
|
||||
{
|
||||
switch ((eax >>5) &0x07)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
// fprintf(stderr,"L1 data cache...\n");
|
||||
int sets = ecx+1;
|
||||
int lines = (ebx & 0x0fff) +1;
|
||||
ebx>>=12;
|
||||
int part = (ebx&0x03ff)+1;
|
||||
ebx >>=10;
|
||||
int assoc = (ebx&0x03ff)+1;
|
||||
LD1.size = (assoc*part*lines*sets)/1024;
|
||||
LD1.associative = assoc;
|
||||
LD1.linesize= lines;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 2:
|
||||
{
|
||||
switch ((eax >>5) &0x07)
|
||||
{
|
||||
case 1:
|
||||
{
|
||||
// fprintf(stderr,"L1 instruction cache...\n");
|
||||
int sets = ecx+1;
|
||||
int lines = (ebx & 0x0fff) +1;
|
||||
ebx>>=12;
|
||||
int part = (ebx&0x03ff)+1;
|
||||
ebx >>=10;
|
||||
int assoc = (ebx&0x03ff)+1;
|
||||
LC1.size = (assoc*part*lines*sets)/1024;
|
||||
LC1.associative = assoc;
|
||||
LC1.linesize= lines;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
|
||||
if (cpuid_level >= 0x80000006) {
|
||||
if(L2.size<=0){
|
||||
@@ -1301,19 +1209,6 @@ int get_cpuname(void){
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 5:
|
||||
// Skylake X
|
||||
#ifndef NO_AVX512
|
||||
return CPUTYPE_SKYLAKEX;
|
||||
#else
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
#endif
|
||||
case 14:
|
||||
// Skylake
|
||||
if(support_avx())
|
||||
@@ -1339,23 +1234,6 @@ int get_cpuname(void){
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
switch (model) {
|
||||
case 6: // Cannon Lake
|
||||
#ifndef NO_AVX512
|
||||
return CPUTYPE_SKYLAKEX;
|
||||
#else
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
#endif
|
||||
}
|
||||
break;
|
||||
case 9:
|
||||
case 8:
|
||||
switch (model) {
|
||||
@@ -1452,8 +1330,6 @@ int get_cpuname(void){
|
||||
switch (model) {
|
||||
case 1:
|
||||
// AMD Ryzen
|
||||
case 8:
|
||||
// AMD Ryzen2
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_ZEN;
|
||||
@@ -1590,7 +1466,6 @@ static char *cpuname[] = {
|
||||
"STEAMROLLER",
|
||||
"EXCAVATOR",
|
||||
"ZEN",
|
||||
"SKYLAKEX"
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
@@ -1645,11 +1520,10 @@ static char *lowercpuname[] = {
|
||||
"steamroller",
|
||||
"excavator",
|
||||
"zen",
|
||||
"skylakex"
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
"UNKNOWN",
|
||||
"UNKOWN",
|
||||
"80486",
|
||||
"P5",
|
||||
"P6",
|
||||
@@ -1677,7 +1551,6 @@ static char *corename[] = {
|
||||
"STEAMROLLER",
|
||||
"EXCAVATOR",
|
||||
"ZEN",
|
||||
"SKYLAKEX"
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
@@ -1709,7 +1582,6 @@ static char *corename_lower[] = {
|
||||
"steamroller",
|
||||
"excavator",
|
||||
"zen",
|
||||
"skylakex"
|
||||
};
|
||||
|
||||
|
||||
@@ -1815,8 +1687,6 @@ int get_coretype(void){
|
||||
break;
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 7:
|
||||
return CORE_ATOM;
|
||||
case 10:
|
||||
case 14:
|
||||
if(support_avx())
|
||||
@@ -1898,19 +1768,6 @@ int get_coretype(void){
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 5:
|
||||
// Skylake X
|
||||
#ifndef NO_AVX512
|
||||
return CORE_SKYLAKEX;
|
||||
#else
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
#endif
|
||||
case 14:
|
||||
// Skylake
|
||||
if(support_avx())
|
||||
@@ -2009,8 +1866,6 @@ int get_coretype(void){
|
||||
switch (model) {
|
||||
case 1:
|
||||
// AMD Ryzen
|
||||
case 8:
|
||||
// Ryzen 2
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_ZEN;
|
||||
|
||||
@@ -29,18 +29,15 @@
|
||||
|
||||
#define CPU_GENERIC 0
|
||||
#define CPU_Z13 1
|
||||
#define CPU_Z14 2
|
||||
|
||||
static char *cpuname[] = {
|
||||
"ZARCH_GENERIC",
|
||||
"Z13",
|
||||
"Z14"
|
||||
"Z13"
|
||||
};
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
"zarch_generic",
|
||||
"z13",
|
||||
"z14"
|
||||
"z13"
|
||||
};
|
||||
|
||||
int detect(void)
|
||||
@@ -65,10 +62,6 @@ int detect(void)
|
||||
if (strstr(p, "2964")) return CPU_Z13;
|
||||
if (strstr(p, "2965")) return CPU_Z13;
|
||||
|
||||
/* detect z14, but fall back to z13 */
|
||||
if (strstr(p, "3906")) return CPU_Z13;
|
||||
if (strstr(p, "3907")) return CPU_Z13;
|
||||
|
||||
return CPU_GENERIC;
|
||||
}
|
||||
|
||||
@@ -114,9 +107,5 @@ void get_cpuconfig(void)
|
||||
printf("#define Z13\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
break;
|
||||
case CPU_Z14:
|
||||
printf("#define Z14\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
12
ctest.c
12
ctest.c
@@ -60,14 +60,6 @@ OS_FREEBSD
|
||||
OS_NETBSD
|
||||
#endif
|
||||
|
||||
#if defined(__OpenBSD__)
|
||||
OS_OPENBSD
|
||||
#endif
|
||||
|
||||
#if defined(__DragonFly__)
|
||||
OS_DRAGONFLY
|
||||
#endif
|
||||
|
||||
#if defined(__sun)
|
||||
OS_SUNOS
|
||||
#endif
|
||||
@@ -101,10 +93,6 @@ OS_INTERIX
|
||||
OS_LINUX
|
||||
#endif
|
||||
|
||||
#if defined(__HAIKU__)
|
||||
OS_HAIKU
|
||||
#endif
|
||||
|
||||
#if defined(__i386) || defined(_X86)
|
||||
ARCH_X86
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
include_directories(${PROJECT_SOURCE_DIR})
|
||||
include_directories(${PROJECT_BINARY_DIR})
|
||||
|
||||
enable_language(Fortran)
|
||||
|
||||
@@ -16,7 +15,7 @@ foreach(float_type ${FLOAT_TYPES})
|
||||
add_executable(x${float_char}cblat1
|
||||
c_${float_char}blat1.f
|
||||
c_${float_char}blas1.c)
|
||||
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME})
|
||||
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static)
|
||||
add_test(NAME "x${float_char}cblat1"
|
||||
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1")
|
||||
|
||||
@@ -28,7 +27,7 @@ foreach(float_type ${FLOAT_TYPES})
|
||||
auxiliary.c
|
||||
c_xerbla.c
|
||||
constant.c)
|
||||
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME})
|
||||
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static)
|
||||
add_test(NAME "x${float_char}cblat2"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2")
|
||||
|
||||
@@ -40,7 +39,7 @@ foreach(float_type ${FLOAT_TYPES})
|
||||
auxiliary.c
|
||||
c_xerbla.c
|
||||
constant.c)
|
||||
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME})
|
||||
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static)
|
||||
add_test(NAME "x${float_char}cblat3"
|
||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3")
|
||||
|
||||
|
||||
@@ -102,13 +102,7 @@ clean ::
|
||||
rm -f x*
|
||||
|
||||
FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CEXTRALIB = -lomp
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
CEXTRALIB =
|
||||
|
||||
# Single real
|
||||
xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include "common.h"
|
||||
#include "cblas_test.h"
|
||||
|
||||
void F77_caxpy(const int *N, OPENBLAS_CONST void *alpha, void *X,
|
||||
void F77_caxpy(const int *N, const void *alpha, void *X,
|
||||
const int *incX, void *Y, const int *incY)
|
||||
{
|
||||
cblas_caxpy(*N, alpha, X, *incX, Y, *incY);
|
||||
@@ -58,13 +58,13 @@ void F77_cswap( const int *N, void *X, const int *incX,
|
||||
return;
|
||||
}
|
||||
|
||||
int F77_icamax(const int *N, OPENBLAS_CONST void *X, const int *incX)
|
||||
int F77_icamax(const int *N, const void *X, const int *incX)
|
||||
{
|
||||
if (*N < 1 || *incX < 1) return(0);
|
||||
return (cblas_icamax(*N, X, *incX)+1);
|
||||
}
|
||||
|
||||
float F77_scnrm2(const int *N, OPENBLAS_CONST void *X, const int *incX)
|
||||
float F77_scnrm2(const int *N, const void *X, const int *incX)
|
||||
{
|
||||
return cblas_scnrm2(*N, X, *incX);
|
||||
}
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
#include "cblas_test.h"
|
||||
|
||||
void F77_cgemv(int *order, char *transp, int *m, int *n,
|
||||
OPENBLAS_CONST void *alpha,
|
||||
CBLAS_TEST_COMPLEX *a, int *lda, OPENBLAS_CONST void *x, int *incx,
|
||||
OPENBLAS_CONST void *beta, void *y, int *incy) {
|
||||
const void *alpha,
|
||||
CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx,
|
||||
const void *beta, void *y, int *incy) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A;
|
||||
int i,j,LDA;
|
||||
|
||||
@@ -349,13 +349,13 @@
|
||||
CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 200
|
||||
* Test CGERC, 12, CGERU, 13.
|
||||
@@ -2660,7 +2660,7 @@
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
C 60 CONTINUE
|
||||
60 CONTINUE
|
||||
LCERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
||||
@@ -329,13 +329,13 @@
|
||||
CALL CCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
|
||||
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL CCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
|
||||
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 190
|
||||
* Test CHERK, 06, CSYRK, 07.
|
||||
@@ -357,13 +357,13 @@
|
||||
CALL CCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
|
||||
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL CCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
|
||||
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 190
|
||||
*
|
||||
@@ -707,9 +707,9 @@
|
||||
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
|
||||
C $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3,
|
||||
C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
|
||||
9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
|
||||
$ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3,
|
||||
$ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
|
||||
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1033,9 +1033,9 @@ C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
|
||||
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
|
||||
C $ ',', F4.1, '), C,', I3, ') .' )
|
||||
9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
|
||||
$ ',', F4.1, '), C,', I3, ') .' )
|
||||
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1385,9 +1385,9 @@ C $ ',', F4.1, '), C,', I3, ') .' )
|
||||
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT(' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ',
|
||||
C $ ' .' )
|
||||
9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ',
|
||||
$ ' .' )
|
||||
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1768,12 +1768,12 @@ C $ ' .' )
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
C $ ' .' )
|
||||
C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1,
|
||||
C $ '), C,', I3, ') .' )
|
||||
9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
$ ' .' )
|
||||
9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1,
|
||||
$ '), C,', I3, ') .' )
|
||||
9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -2221,12 +2221,12 @@ C $ '), C,', I3, ') .' )
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1,
|
||||
C $ ', C,', I3, ') .' )
|
||||
C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
|
||||
C $ ',', F4.1, '), C,', I3, ') .' )
|
||||
9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1,
|
||||
$ ', C,', I3, ') .' )
|
||||
9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
|
||||
$ ',', F4.1, '), C,', I3, ') .' )
|
||||
9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -2702,7 +2702,7 @@ C $ ',', F4.1, '), C,', I3, ') .' )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
C 60 CONTINUE
|
||||
60 CONTINUE
|
||||
LCERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
||||
@@ -14,7 +14,7 @@ double F77_dasum(const int *N, double *X, const int *incX)
|
||||
return cblas_dasum(*N, X, *incX);
|
||||
}
|
||||
|
||||
void F77_daxpy(const int *N, const double *alpha, OPENBLAS_CONST double *X,
|
||||
void F77_daxpy(const int *N, const double *alpha, const double *X,
|
||||
const int *incX, double *Y, const int *incY)
|
||||
{
|
||||
cblas_daxpy(*N, *alpha, X, *incX, Y, *incY);
|
||||
@@ -28,13 +28,13 @@ void F77_dcopy(const int *N, double *X, const int *incX,
|
||||
return;
|
||||
}
|
||||
|
||||
double F77_ddot(const int *N, OPENBLAS_CONST double *X, const int *incX,
|
||||
OPENBLAS_CONST double *Y, const int *incY)
|
||||
double F77_ddot(const int *N, const double *X, const int *incX,
|
||||
const double *Y, const int *incY)
|
||||
{
|
||||
return cblas_ddot(*N, X, *incX, Y, *incY);
|
||||
}
|
||||
|
||||
double F77_dnrm2(const int *N, OPENBLAS_CONST double *X, const int *incX)
|
||||
double F77_dnrm2(const int *N, const double *X, const int *incX)
|
||||
{
|
||||
return cblas_dnrm2(*N, X, *incX);
|
||||
}
|
||||
@@ -72,12 +72,12 @@ double F77_dzasum(const int *N, void *X, const int *incX)
|
||||
return cblas_dzasum(*N, X, *incX);
|
||||
}
|
||||
|
||||
double F77_dznrm2(const int *N, OPENBLAS_CONST void *X, const int *incX)
|
||||
double F77_dznrm2(const int *N, const void *X, const int *incX)
|
||||
{
|
||||
return cblas_dznrm2(*N, X, *incX);
|
||||
}
|
||||
|
||||
int F77_idamax(const int *N, OPENBLAS_CONST double *X, const int *incX)
|
||||
int F77_idamax(const int *N, const double *X, const int *incX)
|
||||
{
|
||||
if (*N < 1 || *incX < 1) return(0);
|
||||
return (cblas_idamax(*N, X, *incX)+1);
|
||||
|
||||
@@ -211,11 +211,11 @@
|
||||
IF (ICASE.EQ.7) THEN
|
||||
* .. DNRM2TEST ..
|
||||
STEMP(1) = DTRUE1(NP1)
|
||||
CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
|
||||
CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC)
|
||||
ELSE IF (ICASE.EQ.8) THEN
|
||||
* .. DASUMTEST ..
|
||||
STEMP(1) = DTRUE3(NP1)
|
||||
CALL STEST1(DASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
|
||||
CALL STEST1(DASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC)
|
||||
ELSE IF (ICASE.EQ.9) THEN
|
||||
* .. DSCALTEST ..
|
||||
CALL DSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX)
|
||||
|
||||
@@ -345,13 +345,13 @@
|
||||
CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 200
|
||||
* Test DGER, 12.
|
||||
@@ -797,9 +797,9 @@
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', 4( I3, ',' ), F4.1,
|
||||
$ ', A,', I3, ',',/ 10x,'X,', I2, ',', F4.1, ', Y,',
|
||||
@@ -1004,7 +1004,7 @@ C $ ' - SUSPECT *******' )
|
||||
$ REWIND NTRA
|
||||
CALL CDSBMV( IORDER, UPLO, N, K, ALPHA,
|
||||
$ AA, LDA, XX, INCX, BETA, YY,
|
||||
$ INCY )
|
||||
$ INCY )
|
||||
ELSE IF( PACKED )THEN
|
||||
IF( TRACE )
|
||||
$ WRITE( NTRA, FMT = 9995 )NC, SNAME,
|
||||
@@ -1156,9 +1156,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', AP',
|
||||
$ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' )
|
||||
@@ -1191,7 +1191,7 @@ C $ ' - SUSPECT *******' )
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION EPS, THRESH
|
||||
INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA,
|
||||
$ IORDER
|
||||
$ IORDER
|
||||
LOGICAL FATAL, REWI, TRACE
|
||||
CHARACTER*12 SNAME
|
||||
* .. Array Arguments ..
|
||||
@@ -1216,7 +1216,7 @@ C $ ' - SUSPECT *******' )
|
||||
EXTERNAL LDE, LDERES
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL DMAKE, DMVCH, CDTBMV, CDTBSV, CDTPMV,
|
||||
$ CDTPSV, CDTRMV, CDTRSV
|
||||
$ CDTPSV, CDTRMV, CDTRSV
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC ABS, MAX
|
||||
* .. Scalars in Common ..
|
||||
@@ -1544,9 +1544,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( 1X, I6, ': ',A12, '(', 3( A14,',' ),/ 10x, I3, ', AP, ',
|
||||
$ 'X,', I2, ') .' )
|
||||
@@ -1579,7 +1579,7 @@ C $ ' - SUSPECT *******' )
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION EPS, THRESH
|
||||
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
|
||||
$ IORDER
|
||||
$ IORDER
|
||||
LOGICAL FATAL, REWI, TRACE
|
||||
CHARACTER*12 SNAME
|
||||
* .. Array Arguments ..
|
||||
@@ -1819,9 +1819,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
9994 FORMAT( 1X, I6, ': ',A12, '(', 2( I3, ',' ), F4.1, ', X,', I2,
|
||||
@@ -1851,7 +1851,7 @@ C $ ' - SUSPECT *******' )
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION EPS, THRESH
|
||||
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
|
||||
$ IORDER
|
||||
$ IORDER
|
||||
LOGICAL FATAL, REWI, TRACE
|
||||
CHARACTER*12 SNAME
|
||||
* .. Array Arguments ..
|
||||
@@ -1973,7 +1973,7 @@ C $ ' - SUSPECT *******' )
|
||||
IF( REWI )
|
||||
$ REWIND NTRA
|
||||
CALL CDSYR( IORDER, UPLO, N, ALPHA, XX, INCX,
|
||||
$ AA, LDA )
|
||||
$ AA, LDA )
|
||||
ELSE IF( PACKED )THEN
|
||||
IF( TRACE )
|
||||
$ WRITE( NTRA, FMT = 9994 )NC, SNAME, CUPLO, N,
|
||||
@@ -2113,9 +2113,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,',
|
||||
@@ -2147,7 +2147,7 @@ C $ ' - SUSPECT *******' )
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION EPS, THRESH
|
||||
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
|
||||
$ IORDER
|
||||
$ IORDER
|
||||
LOGICAL FATAL, REWI, TRACE
|
||||
CHARACTER*12 SNAME
|
||||
* .. Array Arguments ..
|
||||
@@ -2445,9 +2445,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,',
|
||||
@@ -2833,7 +2833,7 @@ C $ ' - SUSPECT *******' )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
C 60 CONTINUE
|
||||
60 CONTINUE
|
||||
LDERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
||||
@@ -56,7 +56,7 @@
|
||||
* .. Local Scalars ..
|
||||
DOUBLE PRECISION EPS, ERR, THRESH
|
||||
INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NTRA,
|
||||
$ LAYOUT
|
||||
$ LAYOUT
|
||||
LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE,
|
||||
$ TSTERR, CORDER, RORDER
|
||||
CHARACTER*1 TRANSA, TRANSB
|
||||
@@ -78,7 +78,7 @@
|
||||
EXTERNAL DDIFF, LDE
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL DCHK1, DCHK2, DCHK3, DCHK4, DCHK5, CD3CHKE,
|
||||
$ DMMCH
|
||||
$ DMMCH
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX, MIN
|
||||
* .. Scalars in Common ..
|
||||
@@ -323,13 +323,13 @@
|
||||
CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
|
||||
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
|
||||
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 190
|
||||
* Test DSYRK, 05.
|
||||
@@ -351,13 +351,13 @@
|
||||
CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
|
||||
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
|
||||
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 190
|
||||
*
|
||||
@@ -588,7 +588,7 @@
|
||||
$ REWIND NTRA
|
||||
CALL CDGEMM( IORDER, TRANSA, TRANSB, M, N,
|
||||
$ K, ALPHA, AA, LDA, BB, LDB,
|
||||
$ BETA, CC, LDC )
|
||||
$ BETA, CC, LDC )
|
||||
*
|
||||
* Check if error-exit was taken incorrectly.
|
||||
*
|
||||
@@ -694,9 +694,9 @@
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
|
||||
C $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ',
|
||||
C $ 'C,', I3, ').' )
|
||||
9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
|
||||
$ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ',
|
||||
$ 'C,', I3, ').' )
|
||||
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1007,9 +1007,9 @@ C $ 'C,', I3, ').' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
C $ ' .' )
|
||||
9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
$ ' .' )
|
||||
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1201,7 +1201,7 @@ C $ ' .' )
|
||||
$ REWIND NTRA
|
||||
CALL CDTRMM( IORDER, SIDE, UPLO, TRANSA,
|
||||
$ DIAG, M, N, ALPHA, AA, LDA,
|
||||
$ BB, LDB )
|
||||
$ BB, LDB )
|
||||
ELSE IF( SNAME( 10: 11 ).EQ.'sm' )THEN
|
||||
IF( TRACE )
|
||||
$ CALL DPRCN3( NTRA, NC, SNAME, IORDER,
|
||||
@@ -1211,7 +1211,7 @@ C $ ' .' )
|
||||
$ REWIND NTRA
|
||||
CALL CDTRSM( IORDER, SIDE, UPLO, TRANSA,
|
||||
$ DIAG, M, N, ALPHA, AA, LDA,
|
||||
$ BB, LDB )
|
||||
$ BB, LDB )
|
||||
END IF
|
||||
*
|
||||
* Check if error-exit was taken incorrectly.
|
||||
@@ -1355,8 +1355,8 @@ C $ ' .' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ', B,', I3, ') .' )
|
||||
9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ', B,', I3, ') .' )
|
||||
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1681,8 +1681,8 @@ C $ F4.1, ', A,', I3, ', B,', I3, ') .' )
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
|
||||
9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
|
||||
9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1726,7 +1726,7 @@ C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
|
||||
SUBROUTINE DCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI,
|
||||
$ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX,
|
||||
$ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
|
||||
$ IORDER )
|
||||
$ IORDER )
|
||||
*
|
||||
* Tests DSYR2K.
|
||||
*
|
||||
@@ -1888,7 +1888,7 @@ C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
|
||||
$ REWIND NTRA
|
||||
CALL CDSYR2K( IORDER, UPLO, TRANS, N, K,
|
||||
$ ALPHA, AA, LDA, BB, LDB, BETA,
|
||||
$ CC, LDC )
|
||||
$ CC, LDC )
|
||||
*
|
||||
* Check if error-exit was taken incorrectly.
|
||||
*
|
||||
@@ -2037,9 +2037,9 @@ C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
C $ ' .' )
|
||||
9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
$ ' .' )
|
||||
9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -2399,7 +2399,7 @@ C $ ' .' )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
C 60 CONTINUE
|
||||
60 CONTINUE
|
||||
LDERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
||||
@@ -14,7 +14,7 @@ float F77_sasum(blasint *N, float *X, blasint *incX)
|
||||
return cblas_sasum(*N, X, *incX);
|
||||
}
|
||||
|
||||
void F77_saxpy(blasint *N, const float *alpha, OPENBLAS_CONST float *X,
|
||||
void F77_saxpy(blasint *N, const float *alpha, const float *X,
|
||||
blasint *incX, float *Y, blasint *incY)
|
||||
{
|
||||
cblas_saxpy(*N, *alpha, X, *incX, Y, *incY);
|
||||
@@ -26,25 +26,25 @@ float F77_scasum(blasint *N, float *X, blasint *incX)
|
||||
return cblas_scasum(*N, X, *incX);
|
||||
}
|
||||
|
||||
float F77_scnrm2(blasint *N, OPENBLAS_CONST float *X, blasint *incX)
|
||||
float F77_scnrm2(blasint *N, const float *X, blasint *incX)
|
||||
{
|
||||
return cblas_scnrm2(*N, X, *incX);
|
||||
}
|
||||
|
||||
void F77_scopy(blasint *N, OPENBLAS_CONST float *X, blasint *incX,
|
||||
void F77_scopy(blasint *N, const float *X, blasint *incX,
|
||||
float *Y, blasint *incY)
|
||||
{
|
||||
cblas_scopy(*N, X, *incX, Y, *incY);
|
||||
return;
|
||||
}
|
||||
|
||||
float F77_sdot(blasint *N, OPENBLAS_CONST float *X, blasint *incX,
|
||||
OPENBLAS_CONST float *Y, blasint *incY)
|
||||
float F77_sdot(blasint *N, const float *X, blasint *incX,
|
||||
const float *Y, blasint *incY)
|
||||
{
|
||||
return cblas_sdot(*N, X, *incX, Y, *incY);
|
||||
}
|
||||
|
||||
float F77_snrm2(blasint *N, OPENBLAS_CONST float *X, blasint *incX)
|
||||
float F77_snrm2(blasint *N, const float *X, blasint *incX)
|
||||
{
|
||||
return cblas_snrm2(*N, X, *incX);
|
||||
}
|
||||
@@ -76,7 +76,7 @@ void F77_sswap( blasint *N, float *X, blasint *incX,
|
||||
return;
|
||||
}
|
||||
|
||||
int F77_isamax(blasint *N, OPENBLAS_CONST float *X, blasint *incX)
|
||||
int F77_isamax(blasint *N, const float *X, blasint *incX)
|
||||
{
|
||||
if (*N < 1 || *incX < 1) return(0);
|
||||
return (cblas_isamax(*N, X, *incX)+1);
|
||||
|
||||
@@ -211,11 +211,11 @@
|
||||
IF (ICASE.EQ.7) THEN
|
||||
* .. SNRM2TEST ..
|
||||
STEMP(1) = DTRUE1(NP1)
|
||||
CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
|
||||
CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC)
|
||||
ELSE IF (ICASE.EQ.8) THEN
|
||||
* .. SASUMTEST ..
|
||||
STEMP(1) = DTRUE3(NP1)
|
||||
CALL STEST1(SASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
|
||||
CALL STEST1(SASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC)
|
||||
ELSE IF (ICASE.EQ.9) THEN
|
||||
* .. SSCALTEST ..
|
||||
CALL SSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX)
|
||||
|
||||
@@ -345,13 +345,13 @@
|
||||
CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 200
|
||||
* Test SGER, 12.
|
||||
@@ -797,9 +797,9 @@
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', 4( I3, ',' ), F4.1,
|
||||
$ ', A,', I3, ',',/ 10x, 'X,', I2, ',', F4.1, ', Y,',
|
||||
@@ -1004,7 +1004,7 @@ C $ ' - SUSPECT *******' )
|
||||
$ REWIND NTRA
|
||||
CALL CSSBMV( IORDER, UPLO, N, K, ALPHA,
|
||||
$ AA, LDA, XX, INCX, BETA, YY,
|
||||
$ INCY )
|
||||
$ INCY )
|
||||
ELSE IF( PACKED )THEN
|
||||
IF( TRACE )
|
||||
$ WRITE( NTRA, FMT = 9995 )NC, SNAME,
|
||||
@@ -1156,9 +1156,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', AP',
|
||||
$ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' )
|
||||
@@ -1191,7 +1191,7 @@ C $ ' - SUSPECT *******' )
|
||||
* .. Scalar Arguments ..
|
||||
REAL EPS, THRESH
|
||||
INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA,
|
||||
$ IORDER
|
||||
$ IORDER
|
||||
LOGICAL FATAL, REWI, TRACE
|
||||
CHARACTER*12 SNAME
|
||||
* .. Array Arguments ..
|
||||
@@ -1216,7 +1216,7 @@ C $ ' - SUSPECT *******' )
|
||||
EXTERNAL LSE, LSERES
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL SMAKE, SMVCH, CSTBMV, CSTBSV, CSTPMV,
|
||||
$ CSTPSV, CSTRMV, CSTRSV
|
||||
$ CSTPSV, CSTRMV, CSTRSV
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC ABS, MAX
|
||||
* .. Scalars in Common ..
|
||||
@@ -1544,9 +1544,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( 1X, I6, ': ',A12, '(', 3( A14,',' ),/ 10x, I3, ', AP, ',
|
||||
$ 'X,', I2, ') .' )
|
||||
@@ -1579,7 +1579,7 @@ C $ ' - SUSPECT *******' )
|
||||
* .. Scalar Arguments ..
|
||||
REAL EPS, THRESH
|
||||
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
|
||||
$ IORDER
|
||||
$ IORDER
|
||||
LOGICAL FATAL, REWI, TRACE
|
||||
CHARACTER*12 SNAME
|
||||
* .. Array Arguments ..
|
||||
@@ -1819,9 +1819,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
9994 FORMAT( 1X, I6, ': ',A12, '(', 2( I3, ',' ), F4.1, ', X,', I2,
|
||||
@@ -1851,7 +1851,7 @@ C $ ' - SUSPECT *******' )
|
||||
* .. Scalar Arguments ..
|
||||
REAL EPS, THRESH
|
||||
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
|
||||
$ IORDER
|
||||
$ IORDER
|
||||
LOGICAL FATAL, REWI, TRACE
|
||||
CHARACTER*12 SNAME
|
||||
* .. Array Arguments ..
|
||||
@@ -1973,7 +1973,7 @@ C $ ' - SUSPECT *******' )
|
||||
IF( REWI )
|
||||
$ REWIND NTRA
|
||||
CALL CSSYR( IORDER, UPLO, N, ALPHA, XX, INCX,
|
||||
$ AA, LDA )
|
||||
$ AA, LDA )
|
||||
ELSE IF( PACKED )THEN
|
||||
IF( TRACE )
|
||||
$ WRITE( NTRA, FMT = 9994 )NC, SNAME, CUPLO, N,
|
||||
@@ -2113,9 +2113,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,',
|
||||
@@ -2147,7 +2147,7 @@ C $ ' - SUSPECT *******' )
|
||||
* .. Scalar Arguments ..
|
||||
REAL EPS, THRESH
|
||||
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
|
||||
$ IORDER
|
||||
$ IORDER
|
||||
LOGICAL FATAL, REWI, TRACE
|
||||
CHARACTER*12 SNAME
|
||||
* .. Array Arguments ..
|
||||
@@ -2445,9 +2445,9 @@ C $ ' - SUSPECT *******' )
|
||||
$ ' (', I6, ' CALL', 'S)' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
C $ ' - SUSPECT *******' )
|
||||
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
|
||||
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
|
||||
$ ' - SUSPECT *******' )
|
||||
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,',
|
||||
@@ -2833,7 +2833,7 @@ C $ ' - SUSPECT *******' )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
C 60 CONTINUE
|
||||
60 CONTINUE
|
||||
LSERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
||||
@@ -694,9 +694,9 @@
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
|
||||
C $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ',
|
||||
C $ 'C,', I3, ').' )
|
||||
9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
|
||||
$ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ',
|
||||
$ 'C,', I3, ').' )
|
||||
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1011,9 +1011,9 @@ C $ 'C,', I3, ').' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
C $ ' .' )
|
||||
9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
$ ' .' )
|
||||
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1359,8 +1359,8 @@ C $ ' .' )
|
||||
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ', B,', I3, ') .' )
|
||||
9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ', B,', I3, ') .' )
|
||||
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1686,8 +1686,8 @@ C $ F4.1, ', A,', I3, ', B,', I3, ') .' )
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
|
||||
9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
|
||||
9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -2041,9 +2041,9 @@ C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
C $ ' .' )
|
||||
9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
$ ' .' )
|
||||
9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -2403,7 +2403,7 @@ C $ ' .' )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
C 60 CONTINUE
|
||||
60 CONTINUE
|
||||
LSERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
||||
@@ -131,7 +131,7 @@ void F77_xerbla(char *srname, void *vinfo)
|
||||
int BLASFUNC(xerbla)(char *name, blasint *info, blasint length) {
|
||||
|
||||
F77_xerbla(name, info);
|
||||
return 0;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
#include "common.h"
|
||||
#include "cblas_test.h"
|
||||
|
||||
void F77_zaxpy(const int *N, OPENBLAS_CONST void *alpha, void *X,
|
||||
void F77_zaxpy(const int *N, const void *alpha, void *X,
|
||||
const int *incX, void *Y, const int *incY)
|
||||
{
|
||||
cblas_zaxpy(*N, alpha, X, *incX, Y, *incY);
|
||||
@@ -23,8 +23,8 @@ void F77_zcopy(const int *N, void *X, const int *incX,
|
||||
return;
|
||||
}
|
||||
|
||||
void F77_zdotc(const int *N, OPENBLAS_CONST void *X, const int *incX,
|
||||
OPENBLAS_CONST void *Y, const int *incY,void *dotc)
|
||||
void F77_zdotc(const int *N, const void *X, const int *incX,
|
||||
const void *Y, const int *incY,void *dotc)
|
||||
{
|
||||
cblas_zdotc_sub(*N, X, *incX, Y, *incY, dotc);
|
||||
return;
|
||||
@@ -58,13 +58,13 @@ void F77_zswap( const int *N, void *X, const int *incX,
|
||||
return;
|
||||
}
|
||||
|
||||
int F77_izamax(const int *N, OPENBLAS_CONST void *X, const int *incX)
|
||||
int F77_izamax(const int *N, const void *X, const int *incX)
|
||||
{
|
||||
if (*N < 1 || *incX < 1) return(0);
|
||||
return(cblas_izamax(*N, X, *incX)+1);
|
||||
}
|
||||
|
||||
double F77_dznrm2(const int *N, OPENBLAS_CONST void *X, const int *incX)
|
||||
double F77_dznrm2(const int *N, const void *X, const int *incX)
|
||||
{
|
||||
return cblas_dznrm2(*N, X, *incX);
|
||||
}
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
#include "cblas_test.h"
|
||||
|
||||
void F77_zgemv(int *order, char *transp, int *m, int *n,
|
||||
OPENBLAS_CONST void *alpha,
|
||||
CBLAS_TEST_ZOMPLEX *a, int *lda, OPENBLAS_CONST void *x, int *incx,
|
||||
OPENBLAS_CONST void *beta, void *y, int *incy) {
|
||||
const void *alpha,
|
||||
CBLAS_TEST_ZOMPLEX *a, int *lda, const void *x, int *incx,
|
||||
const void *beta, void *y, int *incy) {
|
||||
|
||||
CBLAS_TEST_ZOMPLEX *A;
|
||||
int i,j,LDA;
|
||||
|
||||
@@ -349,13 +349,13 @@
|
||||
CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
|
||||
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 200
|
||||
* Test ZGERC, 12, ZGERU, 13.
|
||||
|
||||
@@ -330,13 +330,13 @@
|
||||
CALL ZCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
|
||||
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL ZCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
|
||||
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 190
|
||||
* Test ZHERK, 06, ZSYRK, 07.
|
||||
@@ -358,13 +358,13 @@
|
||||
CALL ZCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
|
||||
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
|
||||
$ 0 )
|
||||
$ 0 )
|
||||
END IF
|
||||
IF (RORDER) THEN
|
||||
CALL ZCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
|
||||
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
|
||||
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
|
||||
$ 1 )
|
||||
$ 1 )
|
||||
END IF
|
||||
GO TO 190
|
||||
*
|
||||
@@ -708,9 +708,9 @@
|
||||
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
|
||||
C $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3,
|
||||
C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
|
||||
9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
|
||||
$ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3,
|
||||
$ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
|
||||
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1034,9 +1034,9 @@ C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
|
||||
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
|
||||
C $ ',', F4.1, '), C,', I3, ') .' )
|
||||
9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
|
||||
$ ',', F4.1, '), C,', I3, ') .' )
|
||||
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1386,9 +1386,9 @@ C $ ',', F4.1, '), C,', I3, ') .' )
|
||||
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT(' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
C 9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ',
|
||||
C $ ' .' )
|
||||
9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ',
|
||||
$ ' .' )
|
||||
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -1769,12 +1769,12 @@ C $ ' .' )
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
C $ ' .' )
|
||||
C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1,
|
||||
C $ '), C,', I3, ') .' )
|
||||
9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ',
|
||||
$ ' .' )
|
||||
9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1,
|
||||
$ '), C,', I3, ') .' )
|
||||
9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -2222,12 +2222,12 @@ C $ '), C,', I3, ') .' )
|
||||
$ 'ANGED INCORRECTLY *******' )
|
||||
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
|
||||
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
|
||||
C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1,
|
||||
C $ ', C,', I3, ') .' )
|
||||
C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
|
||||
C $ ',', F4.1, '), C,', I3, ') .' )
|
||||
9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1,
|
||||
$ ', C,', I3, ') .' )
|
||||
9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
|
||||
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
|
||||
$ ',', F4.1, '), C,', I3, ') .' )
|
||||
9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
|
||||
$ '******' )
|
||||
*
|
||||
@@ -2706,7 +2706,7 @@ C $ ',', F4.1, '), C,', I3, ') .' )
|
||||
50 CONTINUE
|
||||
END IF
|
||||
*
|
||||
C 60 CONTINUE
|
||||
60 CONTINUE
|
||||
LZERES = .TRUE.
|
||||
GO TO 80
|
||||
70 CONTINUE
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
|
||||
include_directories(${PROJECT_SOURCE_DIR})
|
||||
include_directories(${PROJECT_BINARY_DIR})
|
||||
|
||||
# sources that need to be compiled twice, once with no flags and once with LOWER
|
||||
set(UL_SOURCES
|
||||
@@ -73,7 +72,7 @@ GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)
|
||||
# special defines for complex
|
||||
foreach (float_type ${FLOAT_TYPES})
|
||||
|
||||
if (USE_THREAD)
|
||||
if (SMP)
|
||||
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type})
|
||||
|
||||
@@ -107,7 +106,7 @@ foreach (float_type ${FLOAT_TYPES})
|
||||
GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
|
||||
endforeach()
|
||||
|
||||
if (USE_THREAD)
|
||||
if (SMP)
|
||||
|
||||
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type})
|
||||
@@ -186,7 +185,7 @@ foreach (float_type ${FLOAT_TYPES})
|
||||
GenerateCombinationObjects("${l_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TU" false ${float_type})
|
||||
endforeach ()
|
||||
|
||||
if (USE_THREAD)
|
||||
if (SMP)
|
||||
GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type})
|
||||
foreach(nu_smp_source ${NU_SMP_SOURCES})
|
||||
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_source})
|
||||
@@ -197,7 +196,7 @@ foreach (float_type ${FLOAT_TYPES})
|
||||
endif ()
|
||||
endforeach ()
|
||||
|
||||
if (USE_THREAD)
|
||||
if (SMP)
|
||||
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
|
||||
endif ()
|
||||
|
||||
|
||||
@@ -62,13 +62,13 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha,
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) + 4095) & ~4095);
|
||||
// gemvbuffer = bufferX;
|
||||
gemvbuffer = bufferX;
|
||||
COPY_K(M, y, incy, Y, 1);
|
||||
}
|
||||
|
||||
if (incx != 1) {
|
||||
X = bufferX;
|
||||
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) + 4095) & ~4095);
|
||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) + 4095) & ~4095);
|
||||
COPY_K(N, x, incx, X, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
COPY_K(args -> m, x, incx, buffer, 1);
|
||||
|
||||
x = buffer;
|
||||
// buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
|
||||
buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -230,10 +230,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
|
||||
|
||||
#ifndef TRANSA
|
||||
range_m[num_cpu] = num_cpu * ((m + 15) & ~15);
|
||||
if (range_m[num_cpu] > m * num_cpu) range_m[num_cpu] = m * num_cpu;
|
||||
#else
|
||||
range_m[num_cpu] = num_cpu * ((n + 15) & ~15);
|
||||
if (range_m[num_cpu] > n * num_cpu) range_m[num_cpu] = n * num_cpu;
|
||||
#endif
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
|
||||
@@ -62,36 +62,9 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef thread_local
|
||||
# if __STDC_VERSION__ >= 201112 && !defined __STDC_NO_THREADS__
|
||||
# define thread_local _Thread_local
|
||||
# elif defined _WIN32 && ( \
|
||||
defined _MSC_VER || \
|
||||
defined __ICL || \
|
||||
defined __DMC__ || \
|
||||
defined __BORLANDC__ )
|
||||
# define thread_local __declspec(thread)
|
||||
/* note that ICC (linux) and Clang are covered by __GNUC__ */
|
||||
# elif defined __GNUC__ || \
|
||||
defined __SUNPRO_C || \
|
||||
defined __xlC__
|
||||
# define thread_local __thread
|
||||
# else
|
||||
# define UNSAFE
|
||||
#endif
|
||||
#endif
|
||||
#if defined USE_OPENMP
|
||||
#undef UNSAFE
|
||||
#endif
|
||||
|
||||
#if !defined(TRANSA) && !defined(UNSAFE)
|
||||
#ifndef TRANSA
|
||||
#define Y_DUMMY_NUM 1024
|
||||
#if defined(USE_OPENMP)
|
||||
static FLOAT y_dummy[Y_DUMMY_NUM];
|
||||
#pragma omp threadprivate(y_dummy)
|
||||
# else
|
||||
static thread_local FLOAT y_dummy[Y_DUMMY_NUM];
|
||||
# endif
|
||||
#endif
|
||||
|
||||
static int gemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
|
||||
@@ -132,12 +105,10 @@ static int gemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
#ifdef TRANSA
|
||||
y += n_from * incy * COMPSIZE;
|
||||
#else
|
||||
# ifndef UNSAFE
|
||||
//for split matrix row (n) direction and vector x of gemv_n
|
||||
x += n_from * incx * COMPSIZE;
|
||||
//store partial result for every thread
|
||||
y += (m_to - m_from) * 1 * COMPSIZE * pos;
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -165,7 +136,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
|
||||
BLASLONG width, i, num_cpu;
|
||||
|
||||
#if !defined(TRANSA) && !defined(UNSAFE)
|
||||
#ifndef TRANSA
|
||||
int split_x=0;
|
||||
#endif
|
||||
|
||||
@@ -241,7 +212,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
i -= width;
|
||||
}
|
||||
|
||||
#if !defined(TRANSA) && !defined(UNSAFE)
|
||||
#ifndef TRANSA
|
||||
//try to split matrix on row direction and x.
|
||||
//Then, reduction.
|
||||
if (num_cpu < nthreads) {
|
||||
@@ -301,7 +272,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
exec_blas(num_cpu, queue);
|
||||
}
|
||||
|
||||
#if !defined(TRANSA) && !defined(UNSAFE)
|
||||
#ifndef TRANSA
|
||||
if(split_x==1){
|
||||
//reduction
|
||||
for(i=0; i<num_cpu; i++){
|
||||
|
||||
@@ -55,13 +55,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha,
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) + 4095) & ~4095);
|
||||
// sbmvbuffer = bufferX;
|
||||
sbmvbuffer = bufferX;
|
||||
COPY_K(n, y, incy, Y, 1);
|
||||
}
|
||||
|
||||
if (incx != 1) {
|
||||
X = bufferX;
|
||||
// sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) + 4095) & ~4095);
|
||||
sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) + 4095) & ~4095);
|
||||
COPY_K(n, x, incx, X, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
COPY_K(n, x, incx, buffer, 1);
|
||||
|
||||
x = buffer;
|
||||
// buffer += ((COMPSIZE * n + 1023) & ~1023);
|
||||
buffer += ((COMPSIZE * n + 1023) & ~1023);
|
||||
}
|
||||
|
||||
SCAL_K(n, 0, 0, ZERO,
|
||||
@@ -246,7 +246,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = sbmv_kernel;
|
||||
@@ -286,7 +285,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = sbmv_kernel;
|
||||
@@ -318,7 +316,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
|
||||
range_n[num_cpu] = num_cpu * ((n + 15) & ~15);
|
||||
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = sbmv_kernel;
|
||||
|
||||
@@ -53,13 +53,13 @@ int CNAME(BLASLONG m, FLOAT alpha, FLOAT *a,
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) + 4095) & ~4095);
|
||||
// gemvbuffer = bufferX;
|
||||
gemvbuffer = bufferX;
|
||||
COPY_K(m, y, incy, Y, 1);
|
||||
}
|
||||
|
||||
if (incx != 1) {
|
||||
X = bufferX;
|
||||
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) + 4095) & ~4095);
|
||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) + 4095) & ~4095);
|
||||
COPY_K(m, x, incx, X, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -246,7 +246,6 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
|
||||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = spmv_kernel;
|
||||
@@ -286,7 +285,6 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
|
||||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = spmv_kernel;
|
||||
|
||||
@@ -177,8 +177,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
|
||||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
|
||||
|
||||
|
||||
queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode;
|
||||
queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel;
|
||||
queue[MAX_CPU_NUMBER - num_cpu - 1].args = &args;
|
||||
@@ -226,7 +225,6 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
|
||||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = symv_kernel;
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -107,7 +107,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
COPY_K(args -> n, x, incx, buffer, 1);
|
||||
|
||||
x = buffer;
|
||||
// buffer += ((args -> n * COMPSIZE + 1023) & ~1023);
|
||||
buffer += ((args -> n * COMPSIZE + 1023) & ~1023);
|
||||
}
|
||||
|
||||
if (range_n) y += *range_n * COMPSIZE;
|
||||
@@ -288,7 +288,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
|
||||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
@@ -328,7 +327,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
|
||||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
@@ -358,7 +356,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
|
||||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
#endif
|
||||
|
||||
x = buffer;
|
||||
// buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
|
||||
buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
|
||||
}
|
||||
|
||||
#ifndef TRANS
|
||||
@@ -234,7 +234,11 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef COMPLEX
|
||||
int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthreads){
|
||||
#else
|
||||
int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthreads){
|
||||
#endif
|
||||
|
||||
blas_arg_t args;
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
@@ -303,8 +307,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
|
||||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
|
||||
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = tpmv_kernel;
|
||||
queue[num_cpu].args = &args;
|
||||
@@ -343,7 +346,6 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
|
||||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = tpmv_kernel;
|
||||
|
||||
@@ -59,7 +59,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
|
||||
min_i = MIN(m - is, DTB_ENTRIES);
|
||||
|
||||
#ifndef TRANSA
|
||||
if (is > 0){
|
||||
if (is > 0){
|
||||
GEMV_N(is, min_i, 0, dp1,
|
||||
a + is * lda, lda,
|
||||
B + is, 1,
|
||||
|
||||
@@ -346,7 +346,6 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
|
||||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
@@ -386,7 +385,6 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
|
||||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
|
||||
@@ -83,13 +83,13 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
// gemvbuffer = bufferX;
|
||||
gemvbuffer = bufferX;
|
||||
COPY_K(M, y, incy, Y, 1);
|
||||
}
|
||||
|
||||
if (incx != 1) {
|
||||
X = bufferX;
|
||||
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
COPY_K(N, x, incx, X, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -61,13 +61,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||
// sbmvbuffer = bufferX;
|
||||
sbmvbuffer = bufferX;
|
||||
COPY_K(n, y, incy, Y, 1);
|
||||
}
|
||||
|
||||
if (incx != 1) {
|
||||
X = bufferX;
|
||||
// sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||
sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||
COPY_K(n, x, incx, X, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -56,13 +56,13 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
// gemvbuffer = bufferX;
|
||||
gemvbuffer = bufferX;
|
||||
COPY_K(m, y, incy, Y, 1);
|
||||
}
|
||||
|
||||
if (incx != 1) {
|
||||
X = bufferX;
|
||||
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
COPY_K(m, x, incx, X, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -60,13 +60,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||
// sbmvbuffer = bufferX;
|
||||
sbmvbuffer = bufferX;
|
||||
COPY_K(n, y, incy, Y, 1);
|
||||
}
|
||||
|
||||
if (incx != 1) {
|
||||
X = bufferX;
|
||||
// sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||
sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
||||
COPY_K(n, x, incx, X, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -55,13 +55,13 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
|
||||
if (incy != 1) {
|
||||
Y = bufferY;
|
||||
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
// gemvbuffer = bufferX;
|
||||
gemvbuffer = bufferX;
|
||||
COPY_K(m, y, incy, Y, 1);
|
||||
}
|
||||
|
||||
if (incx != 1) {
|
||||
X = bufferX;
|
||||
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
||||
COPY_K(m, x, incx, X, 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dp1 = 1.;
|
||||
const static FLOAT dp1 = 1.;
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@
|
||||
#include <ctype.h>
|
||||
#include "common.h"
|
||||
|
||||
// const static FLOAT dm1 = -1.;
|
||||
const static FLOAT dm1 = -1.;
|
||||
|
||||
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user