Compare commits

..

2 Commits

Author SHA1 Message Date
Zhang Xianyi
5f998efd7b Merge pull request #1247 from martin-frbg/revert-cgroups-pr
Revert #1246, "honor cgroup/cpuset limits" for now
2017-07-25 16:09:55 +08:00
Martin Kroeker
88a35ff457 Revert #1246, "honor cgroup/cpuset limits" for now
Unsafe usage of the __GLIBC_PREREQ macro lead to build breakage on non-glibc systems
2017-07-25 08:39:35 +02:00
2690 changed files with 33868 additions and 93990 deletions

1
.gitignore vendored
View File

@@ -5,7 +5,6 @@
*.def
*.o
*.out
*.tmp
lapack-3.1.1
lapack-3.1.1.tgz
lapack-3.4.1

View File

@@ -1,210 +1,5 @@
# XXX: Precise is already deprecated, new default is Trusty.
# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming
dist: precise
sudo: true
language: c
matrix:
include:
- &test-ubuntu
os: linux
compiler: gcc
addons:
apt:
packages:
- gfortran
before_script: &common-before
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
script:
- set -e
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64"
- <<: *test-ubuntu
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 USE_OPENMP=1"
- <<: *test-ubuntu
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 INTERFACE64=1"
- <<: *test-ubuntu
compiler: clang
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 CC=clang"
- <<: *test-ubuntu
compiler: clang
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 INTERFACE64=1 CC=clang"
- <<: *test-ubuntu
addons:
apt:
packages:
- gcc-multilib
- gfortran-multilib
env:
- TARGET_BOX=LINUX32
- BTYPE="BINARY=32"
- os: linux
compiler: gcc
addons:
apt:
packages:
- binutils-mingw-w64-x86-64
- gcc-mingw-w64-x86-64
- gfortran-mingw-w64-x86-64
before_script: *common-before
script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
- TARGET_BOX=WIN64
- BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
# Build & test on Alpine Linux inside chroot, i.e. on system with musl libc.
# These jobs needs sudo, so Travis runs them on VM-based infrastructure
# which is slower than container-based infrastructure used for jobs
# that don't require sudo.
- &test-alpine
os: linux
dist: trusty
sudo: true
language: minimal
before_install:
- "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
&& echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
- alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
install:
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
before_script: *common-before
script:
- set -e
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
- alpine make -C test $COMMON_FLAGS $BTYPE
- alpine make -C ctest $COMMON_FLAGS $BTYPE
- alpine make -C utest $COMMON_FLAGS $BTYPE
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64"
# XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS,
# but only on Travis CI, cannot reproduce it elsewhere.
#- &test-alpine-openmp
# <<: *test-alpine
# env:
# - TARGET_BOX=LINUX64_MUSL
# - BTYPE="BINARY=64 USE_OPENMP=1"
- <<: *test-alpine
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64 INTERFACE64=1"
# Build with the same flags as Alpine do in OpenBLAS package.
- <<: *test-alpine
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"
- &test-cmake
os: linux
compiler: clang
addons:
apt:
packages:
- gfortran
- cmake
dist: trusty
sudo: true
before_script:
- COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
script:
- set -e
- mkdir build
- CONFIG=Release
- cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
- cmake --build build --config $CONFIG -- -j2
env:
- CMAKE=1
- <<: *test-cmake
env:
- CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1"
- <<: *test-cmake
compiler: gcc
env:
- CMAKE=1
- &test-macos
os: osx
osx_image: xcode8
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
- brew update
- brew install gcc # for gfortran
script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
- BTYPE="BINARY=64 INTERFACE64=1"
- <<: *test-macos
env:
- BTYPE="BINARY=32"
- &emulated-arm
dist: trusty
sudo: required
services: docker
env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=gcc
name: "Emulated Build for ARMV6 with gcc"
before_install: sudo docker run --rm --privileged multiarch/qemu-user-static:register --reset
script: |
echo "FROM openblas/alpine:${IMAGE_ARCH}
COPY . /tmp/openblas
RUN mkdir /tmp/openblas/build && \
cd /tmp/openblas/build && \
CC=${COMPILER} cmake -D DYNAMIC_ARCH=OFF \
-D TARGET=${TARGET_ARCH} \
-D BUILD_SHARED_LIBS=ON \
-D BUILD_WITHOUT_LAPACK=ON \
-D BUILD_WITHOUT_CBLAS=ON \
-D CMAKE_BUILD_TYPE=Release ../ && \
cmake --build ." > Dockerfile
docker build .
- <<: *emulated-arm
env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=clang
name: "Emulated Build for ARMV6 with clang"
- <<: *emulated-arm
env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=gcc
name: "Emulated Build for ARMV8 with gcc"
- <<: *emulated-arm
env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=clang
name: "Emulated Build for ARMV8 with clang"
allow_failures:
- env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=gcc
- env: IMAGE_ARCH=arm32 TARGET_ARCH=ARMV6 COMPILER=clang
- env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=gcc
- env: IMAGE_ARCH=arm64 TARGET_ARCH=ARMV8 COMPILER=clang
# whitelist
branches:
only:
- master
- develop
notifications:
webhooks:
urls:
@@ -212,3 +7,32 @@ notifications:
on_success: change # options: [always|never|change] default: always
on_failure: always # options: [always|never|change] default: always
on_start: never # options: [always|never|change] default: always
compiler:
- gcc
env:
- TARGET_BOX=LINUX64 BTYPE="BINARY=64"
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1"
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1"
- TARGET_BOX=LINUX32 BTYPE="BINARY=32"
- TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq gfortran
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
script:
- set -e
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
# whitelist
branches:
only:
- master
- develop

View File

@@ -3,39 +3,52 @@
##
cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
project(OpenBLAS)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 5)
set(OpenBLAS_MINOR_VERSION 2)
set(OpenBLAS_PATCH_VERSION 20)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
enable_language(ASM)
enable_language(C)
# Adhere to GNU filesystem layout conventions
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)
if(MSVC)
set(OpenBLAS_LIBNAME libopenblas)
else()
set(OpenBLAS_LIBNAME openblas)
endif()
#######
if(MSVC)
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
endif()
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF)
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64 only)" OFF)
option(DYNAMIC_OLDER "Include specific support for older cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoids conflicts with other BLAS libraries, especially when using
# 64 bit integer interfaces in OpenBLAS.
set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in the shared library to avoid conflicts with other BLAS libraries" )
set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" )
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF)
option(BUILD_DEBUG "Build Debug Version" OFF)
#######
if(BUILD_WITHOUT_LAPACK)
set(NO_LAPACK 1)
set(NO_LAPACKE 1)
endif()
if(CMAKE_CONFIGURATION_TYPES) # multiconfig generator?
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE)
set(CMAKE_BUILD_TYPE
Debug Debug
Release Release
)
else()
if( NOT CMAKE_BUILD_TYPE )
if(BUILD_DEBUG)
set(CMAKE_BUILD_TYPE Debug)
else()
set(CMAKE_BUILD_TYPE Release)
endif()
endif()
endif()
if(BUILD_WITHOUT_CBLAS)
set(NO_CBLAS 1)
endif()
@@ -43,13 +56,11 @@ endif()
#######
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
set(OpenBLAS_LIBNAME openblas${SUFFIX64_UNDERSCORE})
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
if (NOT DYNAMIC_ARCH)
@@ -63,9 +74,6 @@ endif ()
set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
if(BUILD_RELAPACK)
list(APPEND SUBDIRS relapack/src)
endif()
endif ()
# set which float types we want to build for
@@ -77,10 +85,6 @@ if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_
set(BUILD_COMPLEX16 true)
endif ()
if (NOT DEFINED BUILD_MATGEN)
set(BUILD_MATGEN true)
endif()
set(FLOAT_TYPES "")
if (BUILD_SINGLE)
message(STATUS "Building Single Precision")
@@ -102,10 +106,19 @@ if (BUILD_COMPLEX16)
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
# all :: libs netlib tests shared
# libs :
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
endif ()
if (${NO_STATIC} AND ${NO_SHARED})
message(FATAL_ERROR "Neither static nor shared are enabled.")
endif ()
#Set default output directory
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
@@ -127,210 +140,134 @@ endforeach ()
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN AND NOT NO_LAPACK)
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE)
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
endif ()
if (NOT NO_LAPACKE)
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
endif ()
endif ()
# Only generate .def for dll on MSVC and always produce pdb files for debug and release
if(MSVC)
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /Zi")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
endif()
if (${DYNAMIC_ARCH})
add_subdirectory(kernel)
foreach(TARGET_CORE ${DYNAMIC_CORE})
message("${TARGET_CORE}")
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:kernel_${TARGET_CORE}>")
endforeach()
endif ()
# Only build shared libs for MSVC
if (MSVC)
set(BUILD_SHARED_LIBS ON)
endif()
# add objects to the openblas lib
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include>)
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
# Android needs to explicitly link against libm
if(ANDROID)
target_link_libraries(${OpenBLAS_LIBNAME} m)
endif()
# Handle MSVC exports
if(MSVC AND BUILD_SHARED_LIBS)
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} LESS 3.4)
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
else()
# Creates verbose .def file (51KB vs 18KB)
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true)
endif()
endif()
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
# Set output for libopenblas
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES EXPORT_NAME "OpenBLAS")
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
endforeach()
enable_testing()
add_subdirectory(utest)
if (USE_THREAD)
# Add threading library to linker
find_package(Threads)
if (THREADS_HAVE_PTHREAD_ARG)
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY COMPILE_OPTIONS "-pthread")
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread")
endif()
target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
if (NOT MSVC)
#only build shared library for MSVC
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1)
if(SMP)
target_link_libraries(${OpenBLAS_LIBNAME} pthread)
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
endif()
if (MSVC OR NOT NOFORTRAN)
# Broken without fortran on unix
add_subdirectory(utest)
#build test and ctest
add_subdirectory(test)
if(NOT NO_CBLAS)
add_subdirectory(ctest)
endif()
endif()
if (NOT MSVC AND NOT NOFORTRAN)
# Build test and ctest
add_subdirectory(test)
if(NOT NO_CBLAS)
add_subdirectory(ctest)
endif()
endif()
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
SOVERSION ${OpenBLAS_MAJOR_VERSION}
)
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
set(ARCH_IN ${ARCH})
endif()
if (${CORE} STREQUAL "generic")
set(ARCH_IN "GENERIC")
endif ()
if (NOT DEFINED EXPRECISION)
set(EXPRECISION_IN 0)
else()
set(EXPRECISION_IN ${EXPRECISION})
endif()
if (NOT DEFINED NO_CBLAS)
set(NO_CBLAS_IN 0)
else()
set(NO_CBLAS_IN ${NO_CBLAS})
endif()
if (NOT DEFINED NO_LAPACK)
set(NO_LAPACK_IN 0)
else()
set(NO_LAPACK_IN ${NO_LAPACK})
endif()
if (NOT DEFINED NO_LAPACKE)
set(NO_LAPACKE_IN 0)
else()
set(NO_LAPACKE_IN ${NO_LAPACKE})
endif()
if (NOT DEFINED NEED2UNDERSCORES)
set(NEED2UNDERSCORES_IN 0)
else()
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
endif()
if (NOT DEFINED ONLY_CBLAS)
set(ONLY_CBLAS_IN 0)
else()
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
endif()
if (NOT DEFINED BU)
set(BU _)
endif()
if (NOT ${SYMBOLPREFIX} STREQUAL "")
message(STATUS "adding prefix ${SYMBOLPREFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()
add_custom_command(TARGET ${OpenBLAS_LIBNAME} POST_BUILD
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)
endif()
# TODO: Why is the config saved here? Is this necessary with CMake?
#Save the config files for installation
# @cp Makefile.conf Makefile.conf_last
# @cp config.h config_last.h
#ifdef QUAD_PRECISION
# @echo "#define QUAD_PRECISION">> config_last.h
#endif
#ifeq ($(EXPRECISION), 1)
# @echo "#define EXPRECISION">> config_last.h
#endif
###
#ifeq ($(DYNAMIC_ARCH), 1)
# @$(MAKE) -C kernel commonlibs || exit 1
# @for d in $(DYNAMIC_CORE) ; \
# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
# done
# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last
#endif
#ifdef USE_THREAD
# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
#endif
# @touch lib.grd
# Install project
# Install libraries
install(TARGETS ${OpenBLAS_LIBNAME}
EXPORT "OpenBLAS${SUFFIX64}Targets"
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
# Install headers
set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
set(CMAKE_INSTALL_FULL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
# Install include files
set (GENCONFIG_BIN ${CMAKE_BINARY_DIR}/gen_config_h${CMAKE_EXECUTABLE_SUFFIX})
ADD_CUSTOM_COMMAND(
OUTPUT ${CMAKE_BINARY_DIR}/openblas_config.h
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h
COMMAND ${GENCONFIG_BIN} ${CMAKE_CURRENT_SOURCE_DIR}/config.h ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h > ${CMAKE_BINARY_DIR}/openblas_config.h
)
message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}")
ADD_CUSTOM_TARGET(genconfig
ALL
DEPENDS openblas_config.h
)
add_dependencies(genconfig ${OpenBLAS_LIBNAME})
set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h)
file(WRITE ${OPENBLAS_CONFIG_H} "#ifndef OPENBLAS_CONFIG_H\n")
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_CONFIG_H\n")
file(STRINGS ${PROJECT_BINARY_DIR}/config.h __lines)
foreach(line ${__lines})
string(REPLACE "#define " "" line ${line})
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_${line}\n")
endforeach()
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_VERSION \"OpenBLAS ${OpenBLAS_VERSION}\"\n")
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS)
file(APPEND ${OPENBLAS_CONFIG_H} "${OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS}\n")
file(APPEND ${OPENBLAS_CONFIG_H} "#endif /* OPENBLAS_CONFIG_H */\n")
install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install (FILES ${CMAKE_BINARY_DIR}/openblas_config.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(NOT NOFORTRAN)
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
set(F77BLAS_H ${CMAKE_BINARY_DIR}/f77blas.h)
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n")
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n")
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n")
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS)
file(APPEND ${F77BLAS_H} "${COMMON_INTERFACE_H_CONTENTS}\n")
file(APPEND ${F77BLAS_H} "#endif")
install (FILES ${F77BLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
ADD_CUSTOM_TARGET(genf77blas
ALL
COMMAND ${AWK} 'BEGIN{print \"\#ifndef OPENBLAS_F77BLAS_H\" \; print \"\#define OPENBLAS_F77BLAS_H\" \; print \"\#include \\"openblas_config.h\\" \"}; NF {print}; END{print \"\#endif\"}' ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h > ${CMAKE_BINARY_DIR}/f77blas.h
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/config.h
)
add_dependencies(genf77blas ${OpenBLAS_LIBNAME})
install (FILES ${CMAKE_BINARY_DIR}/f77blas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(NOT NO_CBLAS)
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
file(WRITE ${CMAKE_BINARY_DIR}/cblas.tmp "${CBLAS_H_CONTENTS_NEW}")
install (FILES ${CMAKE_BINARY_DIR}/cblas.tmp DESTINATION ${CMAKE_INSTALL_INCLUDEDIR} RENAME cblas.h)
ADD_CUSTOM_TARGET(gencblas
ALL
COMMAND ${SED} 's/common/openblas_config/g' ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h > "${CMAKE_BINARY_DIR}/cblas.tmp"
COMMAND cp "${CMAKE_BINARY_DIR}/cblas.tmp" "${CMAKE_BINARY_DIR}/cblas.h"
DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h
)
add_dependencies(gencblas ${OpenBLAS_LIBNAME})
install (FILES ${CMAKE_BINARY_DIR}/cblas.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
if(NOT NO_LAPACKE)
@@ -338,35 +275,19 @@ if(NOT NO_LAPACKE)
add_dependencies( ${OpenBLAS_LIBNAME} genlapacke)
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
ADD_CUSTOM_TARGET(genlapacke
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
COMMAND cp ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
)
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
if(NOT MSVC)
install (TARGETS ${OpenBLAS_LIBNAME}_static DESTINATION ${CMAKE_INSTALL_LIBDIR})
endif()
include(FindPkgConfig QUIET)
if(PKG_CONFIG_FOUND)
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
endif()
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
set(PN OpenBLAS)
set(CMAKECONFIG_INSTALL_DIR "share/cmake/${PN}${SUFFIX64}")
configure_package_config_file(cmake/${PN}Config.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake"
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR})
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
VERSION ${${PN}_VERSION}
COMPATIBILITY AnyNewerVersion)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
RENAME ${PN}${SUFFIX64}ConfigVersion.cmake
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
install(EXPORT "${PN}${SUFFIX64}Targets"
NAMESPACE "${PN}${SUFFIX64}::"
DESTINATION ${CMAKECONFIG_INSTALL_DIR})

View File

@@ -166,5 +166,5 @@ In chronological order:
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
* [2017-02-26] ztrmm kernel for IBM z13
* [2017-03-13] strmm and ctrmm kernel for IBM z13
* [2017-09-01] initial Blas Level-1,2 (double precision) for IBM z13

View File

@@ -1,247 +1,4 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.5
31-Dec-2018
common:
* loop unrolling in TRMV has been enabled again.
* A domain error in the thread workload distribution for SYRK
has been fixed.
* gmake builds will now automatically add -fPIC to the build
options if the platform requires it.
* a pthreads key leakage (and associate crash on dlclose) in
the USE_TLS codepath was fixed.
* building of the utest cases on systems that do not provide
an implementation of complex.h was fixed.
x86_64:
* the SkylakeX code was changed to compile on OSX.
* unwanted application of the -march=skylake-avx512 option
to the common code parts of a DYNAMIC_ARCH build was fixed.
* improved performance of SGEMM for small workloads on Skylake X.
* performance of SGEMM and DGEMM was improved on Haswell.
ARMV8:
* a configuration error that broke the CNRM2 kernel was corrected.
* compilation of the GEMM kernels with CMAKE was fixed.
* DYNAMIC_ARCH builds are now available with CMAKE as well.
* using CMAKE for cross-compilation to the new cpu TARGETs
introduced in 0.3.4 now works.
POWER:
* a problem in cpu autodetection for AIX has been corrected.
====================================================================
Version 0.3.4
02-Dec-2018
common:
* the new, experimental thread-local memory allocation had
inadvertently been left enabled for gmake builds in 0.3.3
despite the announcement. It is now disabled by default, and
single-threaded builds will keep using the old allocator even
if the USE_TLS option is turned on.
* OpenBLAS will now provide enough buffer space for at least 50
threads by default.
* The output of openblas_get_config() now contains the version
number.
* A serious thread safety bug in GEMV operation with small M and
large N size has been fixed.
* The code will now automatically call blas_thread_init after a
fork if needed before handling a call to openblas_set_num_threads
* Accesses to parallelized level3 functions from multiple callers
are now serialized to avoid thread races (unless using OpenMP).
This should provide better performance than the known-threadsafe
(but non-default) USE_SIMPLE_THREADED_LEVEL3 option.
* When building LAPACK with gfortran, -frecursive is now (again)
enabled by default to ensure correct behaviour.
* The OpenBLAS version cblas.h now supports both CBLAS_ORDER and
CBLAS_LAYOUT as the name of the matrix row/column order option.
* Externally set LDFLAGS are now passed through to the final compile/link
steps to facilitate setting platform-specific linker flags.
* A potential race condition during the build of LAPACK (that would
usually manifest itself as a failure to build TESTING/MATGEN) has been
fixed.
* xHEMV has been changed to stay single-threaded for small input sizes
where the overhead of multithreading exceeds any possible gains
* CSWAP and ZSWAP have been limited to a single thread except on ARMV8 or
ThunderX hardware with sizable input.
* Linker flags for the PGI compiler have been updated
* Behaviour of AXPY with zero increments is now handled in the C interface,
correcting the result on at least Intel Atom.
* The result matrix from calling SGELSS with an all-zero input matrix is
now zeroed completely.
x86_64:
* Autodetection of AMD Ryzen2 has been fixed (again).
* CMAKE builds now support labeling of an INTERFACE64=1 build of
the library with the _64 suffix.
* AVX512 version of DGEMM has been added and the AVX512 SGEMM kernel
has been sped up by rewriting with C intrinsics
* Fixed compilation on RHEL5/CENTOS5 (issue with typename __WAIT_STATUS)
POWER:
* added support for building on AIX (with gcc and GNU tools from AIX Toolbox).
* CPU type detection has been implemented for AIX.
* CPU type detection has been fixed for NETBSD.
MIPS64:
* AXPY on LOONGSON3A has been corrected to pass "zero increment" utest.
* DSDOT on LOONGSON3A has been fixed.
* the SGEMM microkernel has been hardened against potential data loss.
ARMV8:
* DYNAMic_ARCH support is now available for 64bit ARM
* cross-compiling for ARMV8 under iOS now works.
* cpu-specific code has been rearranged to make better use of both
hardware commonalities and model-specific compiler optimizations.
* XGENE1 has been removed as a TARGET, superseded by the improved generic
ARMV8 support.
ARMV7:
* Older assembly mnemonics have been converted to UAL form to allow
building with clang 7.0
* Cross compiling LAPACKE for Android has been fixed again (broken by
update to LAPACK 3.7.0 some while ago).
====================================================================
Version 0.3.3
31-Aug-2018
common:
* thread memory allocation has been switched back to the method
used before version 0.3.1 due to unexpected problems caused by
the new code under some circumstances. A new compile-time option
USE_TLS has been added to enable the new code, and it is hoped
that this can become the default again in the next version.
* LAPAck PR272 has been integrated, which fixes spurious errors
in DSYEVR and related functions caused by missing conversion
from ILAENV to ILAENV_2STAGE in several _2stage routines.
* the cmake-generated OpenBLASConfig.cmake now uses correct case
for the name of the library
* added support for Haiku OS
x86_64:
* added AVX512 implementations of SDOT, DDOT, SAXPY, DAXPY,
DSCAL, DGEMVN and DSYMVL
* added a workaround for a cygwin issue that prevented compilation
of AVX512 code
IBM Z:
* added autodetection of Z14
* fixed TRMM errors in the generic target
====================================================================
Version 0.3.2
30-Jul-2018
common:
* fixes for regressions caused by the rewrite of the thread
initialization code in 0.3.1
POWER:
* fixed cpu autodetection for the BSDs
MIPS64:
* fixed utest errors in AXPY, DSDOT, ROT and SWAP
x86_64:
* added autodetection of AMD Ryzen 2
* fixed build with older versions of MSVC
====================================================================
Version 0.3.1
01-Jul-2018
common:
* rewritten thread initialization code with significantly reduced overhead
* added CBLAS interfaces to the IxAMIN BLAS extension functions
* fixed the lapack-test target
* CMAKE builds now create an OpenBLASConfig.cmake file
* ZAXPY now uses a single thread for small input sizes
* the LAPACK code was updated from Reference-LAPACK/lapack#253
(fixing LAPACKE interfaces to Aasen's functions)
POWER:
* corrected CROT and ZROT behaviour with zero INC_X
ARMV7:
* corrected xDOT behaviour with zero INC_X or INC_Y
x86_64:
* retired some older targets of DYNAMIC_ARCH builds to a new option DYNAMIC_OLDER,
this affects PENRYN,DUNNINGTON,OPTERON,OPTERON_SSE3,BOBCAT,ATOM and NANO
(which will still be supported via the slower PRESCOTT kernels when this option is not set)
* added an option DYNAMIC_LIST that (used in conjunction with DYNAMIC_ARCH) allows to
specify the list of x86_64 targets to include. Any target not on the list will be supported
by the Sandybridge or Nehalem kernels if available, or by Prescott.
* improved SWITCH_RATIO on Haswell for increased GEMM throughput
* added initial support for Intel Skylake X, including an AVX512 SGEMM kernel
* added autodetection of Intel Cannon Lake series as Skylake X
* added a default L2 cache size for hypervisors that return zero here (Chromebook)
* fixed a name clash with recent Windows10 headers that broke the build with (at least)
recent mingw from MSYS2
* fixed a link error in mixed clang/gfortran builds with OpenMP
* updated the OSX deployment target to 10.8
* switched on parallel make for builds on MS Windows by default
x86:
* fixed SSWAP and DSWAP behaviour with zero INC_X and INC_Y
====================================================================
Version 0.3.0
23-May-2108
common:
* fixed some more thread race and locking bugs
* added preliminary support for calling an OpenMP build of the library from multiple threads
* removed performance impact of thread locks added in 0.2.20 on OpenMP code
* general code cleanup
* optimized DSDOT implementation
* improved thread distribution for GEMM
* corrected IMATCOPY/OMATCOPY implementation
* fixed out-of-bounds accesses in the multithreaded xBMV/xPMV and SYMV implementations
* cmake build improvements
* pkgconfig file now contains build options
* openblas_get_config() now reports USE_OPENMP and NUM_THREADS settings used for the build
* corrections and improvements for systems with more than 64 cpus
* LAPACK code updated to 3.8.0 including later fixes
* added ReLAPACK, a recursive implementation of several LAPACK functions
* Rewrote ROTMG to handle cases that the netlib code failed to address
* Disabled (broken) multithreading code for xTRMV
* corrected prototypes of complex CBLAS functions to make our cblas.h match the generally accepted standard
* shared memory access failures on startup are now handled more gracefully
* restored utests from earlier releases (and made them pass on all affected systems)
SPARC:
* several fixes for cpu autodetection
POWER:
* corrected vector register overwriting in several Power8 kernels
* optimized additional BLAS functions
ARM:
* added support for CortexA53 and A72
* added autodetection for ThunderX2T99
* made most optimized kernels the default for generic ARMv8 targets
x86_64:
* parallelized DDOT kernel for Haswell
* changed alignment directives in assembly kernels to boost performance on OSX
* fixed register handling in the GEMV microkernels (bug exposed by gcc7)
* added support for building on OpenBSD and Dragonfly
* updated compiler options to work with Intel release 2018
* support fully optimized build with clang/flang on Microsoft Windows
* fixed building on AIX
IBM Z:
* added optimized BLAS 1/2 functions
MIPS:
* fixed cpu autodetection helper code
* added mips32 1004K cpu (Mediatek MT7621 and similar SoC)
* added mips64 I6500 cpu
====================================================================
Version 0.2.20
24-Jul-2017

View File

@@ -21,17 +21,6 @@ ifeq ($(BUILD_RELAPACK), 1)
RELA = re_lapack
endif
ifeq ($(NO_FORTRAN), 1)
define NOFORTRAN
1
endef
define NO_LAPACK
1
endef
export NOFORTRAN
export NO_LAPACK
endif
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
@@ -58,7 +47,7 @@ endif
endif
@echo " C compiler ... $(C_COMPILER) (command line : $(CC))"
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifndef NOFORTRAN
@echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))"
endif
ifneq ($(OSNAME), AIX)
@@ -97,12 +86,16 @@ endif
shared :
ifndef NO_SHARED
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
ifeq ($(OSNAME), FreeBSD)
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), NetBSD)
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
@@ -119,7 +112,7 @@ endif
endif
tests :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifndef NOFORTRAN
touch $(LIBNAME)
ifndef NO_FBLAS
$(MAKE) -C test all
@@ -131,7 +124,7 @@ endif
endif
libs :
ifeq ($(CORE), UNKNOWN)
ifeq ($(CORE), UNKOWN)
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
endif
ifeq ($(NOFORTRAN), 1)
@@ -164,9 +157,6 @@ ifeq ($(DYNAMIC_ARCH), 1)
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
done
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
ifeq ($(DYNAMIC_OLDER), 1)
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
endif
endif
ifdef USE_THREAD
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
@@ -221,7 +211,7 @@ netlib :
else
netlib : lapack_prebuild
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifndef NOFORTRAN
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
endif
@@ -242,7 +232,7 @@ prof_lapack : lapack_prebuild
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
lapack_prebuild :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifndef NOFORTRAN
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
@@ -251,8 +241,8 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
@@ -267,8 +257,6 @@ ifeq ($(F_COMPILER), GFORTRAN)
ifdef SMP
ifeq ($(OSNAME), WINNT)
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
else ifeq ($(OSNAME), Haiku)
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
@@ -282,26 +270,25 @@ endif
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif
large.tgz :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifndef NOFORTRAN
if [ ! -a $< ]; then
-wget http://www.netlib.org/lapack/timing/large.tgz;
fi
endif
timing.tgz :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifndef NOFORTRAN
if [ ! -a $< ]; then
-wget http://www.netlib.org/lapack/timing/timing.tgz;
fi
endif
lapack-timing : large.tgz timing.tgz
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifndef NOFORTRAN
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING
@@ -310,10 +297,9 @@ endif
lapack-test :
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
endif
@@ -325,9 +311,9 @@ lapack-runtest:
blas-test:
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out)
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out)
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out)
dummy :

View File

@@ -4,37 +4,22 @@ CCOMMON_OPT += -march=armv8-a
FCOMMON_OPT += -march=armv8-a
endif
ifeq ($(CORE), CORTEXA53)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
endif
ifeq ($(CORE), CORTEXA57)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
endif
ifeq ($(CORE), CORTEXA72)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
ifeq ($(CORE), CORTEXA73)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
ifeq ($(CORE), VULCAN)
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan
endif
ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
endif
ifeq ($(CORE), FALKOR)
CCOMMON_OPT += -march=armv8-a -mtune=falkor
FCOMMON_OPT += -march=armv8-a -mtune=falkor
CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx
endif
ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
CCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99
FCOMMON_OPT += -mtune=thunderx2t99 -mcpu=thunderx2t99
endif

View File

@@ -48,7 +48,6 @@ ifndef NO_CBLAS
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
endif
ifneq ($(OSNAME), AIX)
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@@ -67,14 +66,18 @@ endif
#for install shared library
ifndef NO_SHARED
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku))
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android))
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD OpenBSD NetBSD DragonFly))
ifeq ($(OSNAME), FreeBSD)
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), NetBSD)
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so
@@ -87,50 +90,21 @@ ifeq ($(OSNAME), Darwin)
endif
ifeq ($(OSNAME), WINNT)
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-cp $(LIBDLLNAME).a "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
endif
ifeq ($(OSNAME), CYGWIN_NT)
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
endif
endif
else
#install on AIX has different options syntax
ifndef NO_LAPACKE
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
endif
#for install static library
ifndef NO_STATIC
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifndef NO_SHARED
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
endif
#Generating openblas.pc
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo Generating openblas.pc in $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@echo 'version='$(VERSION) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@echo 'extralib='$(EXTRALIB) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
@cat openblas.pc.in >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc
#Generating OpenBLASConfig.cmake
@@ -140,7 +114,7 @@ endif
ifndef NO_SHARED
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))

View File

@@ -17,10 +17,6 @@ ifdef CPUIDEMU
EXFLAGS = -DCPUIDEMU -DVENDOR=99
endif
ifeq ($(TARGET), 1004K)
TARGET_FLAGS = -mips32r2
endif
ifeq ($(TARGET), P5600)
TARGET_FLAGS = -mips32r5
endif
@@ -33,10 +29,6 @@ ifeq ($(TARGET), P6600)
TARGET_FLAGS = -mips64r6
endif
ifeq ($(TARGET), I6500)
TARGET_FLAGS = -mips64r6
endif
all: getarch_2nd
./getarch_2nd 0 >> $(TARGET_MAKE)
./getarch_2nd 1 >> $(TARGET_CONF)

View File

@@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.3.5
VERSION = 0.2.20
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@@ -17,11 +17,6 @@ VERSION = 0.3.5
# If you want to support multiple architecture in one binary
# DYNAMIC_ARCH = 1
# If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH
# mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON,
# OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures)
# DYNAMIC_OLDER = 1
# C compiler including binary type(32bit / 64bit). Default is gcc.
# Don't use Intel Compiler or PGI, it won't generate right codes as I expect.
# CC = gcc
@@ -60,26 +55,11 @@ VERSION = 0.3.5
# This flag is always set for POWER8. Don't modify the flag
# USE_OPENMP = 1
# The OpenMP scheduler to use - by default this is "static" and you
# will normally not want to change this unless you know that your main
# workload will involve tasks that have highly unbalanced running times
# for individual threads. Changing away from "static" may also adversely
# affect memory access locality in NUMA systems. Setting to "runtime" will
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
# CCOMMON_OPT += -DOMP_SCHED=dynamic
# You can define maximum number of threads. Basically it should be
# less than actual number of cores. If you don't specify one, it's
# automatically detected by the the script.
# NUM_THREADS = 24
# If you have enabled USE_OPENMP and your application would call
# OpenBLAS's calculation API from multi threads, please comment it in.
# This flag defines how many instances of OpenBLAS's calculation API can
# actually run in parallel. If more threads call OpenBLAS's calculation API,
# they need to wait for the preceding API calls to finish or risk data corruption.
# NUM_PARALLEL = 2
# if you don't need to install the static library, please comment it in.
# NO_STATIC = 1
@@ -109,12 +89,6 @@ BUILD_LAPACK_DEPRECATED = 1
# If you want to use legacy threaded Level 3 implementation.
# USE_SIMPLE_THREADED_LEVEL3 = 1
# If you want to use the new, still somewhat experimental code that uses
# thread-local storage instead of a central memory buffer in memory.c
# Note that if your system uses GLIBC, it needs to have at least glibc 2.21
# for this to work.
# USE_TLS = 1
# If you want to drive whole 64bit region by BLAS. Not all Fortran
# compiler supports this. It's safe to keep comment it out if you
# are not sure(equivalent to "-i8" option).
@@ -126,7 +100,7 @@ BUILD_LAPACK_DEPRECATED = 1
NO_WARMUP = 1
# If you want to disable CPU/Memory affinity on Linux.
NO_AFFINITY = 1
#NO_AFFINITY = 1
# if you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
# BIGNUMA = 1
@@ -152,9 +126,6 @@ NO_AFFINITY = 1
# FUNCTION_PROFILE = 1
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
# This option should not be used - it is a holdover from unfinished code present
# in the original GotoBLAS2 library that may be usable as a starting point but
# is not even expected to compile in its present form.
# QUAD_PRECISION = 1
# Theads are still working for a while after finishing BLAS operation
@@ -173,11 +144,8 @@ NO_AFFINITY = 1
# CONSISTENT_FPCSR = 1
# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
# with single thread. (Actually in recent versions this is a factor proportional to the
# number of floating point operations necessary for the given problem size, no longer
# an individual dimension). You can use this setting to avoid the overhead of multi-
# threading in small matrix sizes. The default value is 4, but values as high as 50 have
# been reported to be optimal for certain workloads (50 is the recommended value for Julia).
# with single thread. You can use this flag to avoid the overhead of multi-threading
# in small matrix sizes. The default value is 4.
# GEMM_MULTITHREAD_THRESHOLD = 4
# If you need santy check by comparing reference BLAS. It'll be very
@@ -192,8 +160,8 @@ NO_AFFINITY = 1
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
# COMMON_OPT = -O2
# gfortran option for LAPACK to improve thread-safety
# It is enabled by default in Makefile.system for gfortran
# gfortran option for LAPACK
# enable this flag only on 64bit Linux and if you need a thread safe lapack library
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
# FCOMMON_OPT = -frecursive

View File

@@ -9,17 +9,6 @@ ifndef TOPDIR
TOPDIR = .
endif
# Catch conflicting usage of ARCH in some BSD environments
ifeq ($(ARCH), amd64)
override ARCH=x86_64
else ifeq ($(ARCH), powerpc64)
override ARCH=power
else ifeq ($(ARCH), i386)
override ARCH=x86
else ifeq ($(ARCH), aarch64)
override ARCH=arm64
endif
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
# Default C compiler
@@ -28,24 +17,15 @@ NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
# http://stackoverflow.com/questions/4029274/mingw-and-make-variables
# - Default value is 'cc' which is not always a valid command (e.g. MinGW).
ifeq ($(origin CC),default)
# Check if $(CC) refers to a valid command and set the value to gcc if not
ifneq ($(findstring cmd.exe,$(SHELL)),)
ifeq ($(shell where $(CC) 2>NUL),)
CC = gcc
# Change the default compile to clang on Mac OSX.
# http://stackoverflow.com/questions/714100/os-detecting-makefile
UNAME_S := $(shell uname -s)
ifeq ($(UNAME_S),Darwin)
CC = clang
# EXTRALIB += -Wl,-no_compact_unwind
endif
endif
else # POSIX-ish
ifeq ($(shell command -v $(CC) 2>/dev/null),)
ifeq ($(shell uname -s),Darwin)
CC = clang
# EXTRALIB += -Wl,-no_compact_unwind
else
CC = gcc
endif # Darwin
endif # CC exists
endif # Shell is sane
endif # CC is set to default
# Default Fortran compiler (FC) is selected by f_check.
@@ -73,9 +53,6 @@ ifeq ($(BINARY), 32)
ifeq ($(TARGET), HASWELL)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
@@ -109,9 +86,6 @@ ifeq ($(BINARY), 32)
ifeq ($(TARGET_CORE), HASWELL)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
@@ -158,10 +132,6 @@ ifeq ($(NO_AVX2), 1)
GETARCH_FLAGS += -DNO_AVX2
endif
ifeq ($(NO_AVX512), 1)
GETARCH_FLAGS += -DNO_AVX512
endif
ifeq ($(DEBUG), 1)
GETARCH_FLAGS += -g
endif
@@ -205,17 +175,12 @@ endif
endif
ifndef NUM_PARALLEL
NUM_PARALLEL = 1
endif
ifndef NUM_THREADS
NUM_THREADS = $(NUM_CORES)
endif
ifeq ($(NUM_THREADS), 1)
override USE_THREAD = 0
override USE_OPENMP = 0
endif
ifdef USE_THREAD
@@ -259,12 +224,12 @@ endif
ifeq ($(OSNAME), Darwin)
ifndef MACOSX_DEPLOYMENT_TARGET
export MACOSX_DEPLOYMENT_TARGET=10.8
export MACOSX_DEPLOYMENT_TARGET=10.6
endif
MD5SUM = md5 -r
endif
ifneq (,$(findstring $(OSNAME), FreeBSD OpenBSD DragonFly))
ifeq ($(OSNAME), FreeBSD)
MD5SUM = md5 -r
endif
@@ -338,7 +303,6 @@ endif
ifeq ($(OSNAME), CYGWIN_NT)
NEED_PIC = 0
NO_EXPRECISION = 1
OS_CYGWIN_NT = 1
endif
ifneq ($(OSNAME), WINNT)
@@ -458,7 +422,7 @@ CCOMMON_OPT += -fopenmp
endif
ifeq ($(C_COMPILER), INTEL)
CCOMMON_OPT += -fopenmp
CCOMMON_OPT += -openmp
endif
ifeq ($(C_COMPILER), PGI)
@@ -483,44 +447,13 @@ DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2
ifeq ($(DYNAMIC_OLDER), 1)
DYNAMIC_CORE += PENRYN DUNNINGTON
endif
DYNAMIC_CORE += NEHALEM
ifeq ($(DYNAMIC_OLDER), 1)
DYNAMIC_CORE += OPTERON OPTERON_SSE3
endif
DYNAMIC_CORE += BARCELONA
ifeq ($(DYNAMIC_OLDER), 1)
DYNAMIC_CORE += BOBCAT ATOM NANO
endif
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
endif
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL ZEN
endif
ifneq ($(NO_AVX512), 1)
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += SKYLAKEX
endif
endif
endif
ifdef DYNAMIC_LIST
override DYNAMIC_CORE = PRESCOTT $(DYNAMIC_LIST)
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_PRESCOTT
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
CCOMMON_OPT += $(XCCOMMON_OPT)
#CCOMMON_OPT += -DDYNAMIC_LIST='$(DYNAMIC_LIST)'
endif
ifeq ($(ARCH), arm64)
DYNAMIC_CORE = ARMV8
DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99
endif
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
@@ -620,14 +553,9 @@ CCOMMON_OPT += -march=mips64
FCOMMON_OPT += -march=mips64
endif
ifeq ($(CORE), 1004K)
CCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
FCOMMON_OPT += -mips32r2 $(MSA_FLAGS)
endif
ifeq ($(CORE), P5600)
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS)
endif
ifeq ($(CORE), I6400)
@@ -640,11 +568,6 @@ CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS)
endif
ifeq ($(CORE), I6500)
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS)
endif
ifeq ($(OSNAME), AIX)
BINARY_DEFINED = 1
endif
@@ -652,14 +575,12 @@ endif
endif
ifndef BINARY_DEFINED
ifneq ($(OSNAME), AIX)
ifdef BINARY64
CCOMMON_OPT += -m64
else
CCOMMON_OPT += -m32
endif
endif
endif
endif
@@ -704,7 +625,6 @@ ifeq ($(F_COMPILER), G77)
CCOMMON_OPT += -DF_INTERFACE_G77
FCOMMON_OPT += -Wall
ifndef NO_BINARY_MODE
ifneq ($(OSNAME), AIX)
ifdef BINARY64
FCOMMON_OPT += -m64
else
@@ -712,12 +632,10 @@ FCOMMON_OPT += -m32
endif
endif
endif
endif
ifeq ($(F_COMPILER), G95)
CCOMMON_OPT += -DF_INTERFACE_G95
FCOMMON_OPT += -Wall
ifneq ($(OSNAME), AIX)
ifndef NO_BINARY_MODE
ifdef BINARY64
FCOMMON_OPT += -m64
@@ -726,13 +644,10 @@ FCOMMON_OPT += -m32
endif
endif
endif
endif
ifeq ($(F_COMPILER), GFORTRAN)
CCOMMON_OPT += -DF_INTERFACE_GFORT
FCOMMON_OPT += -Wall
# make single-threaded LAPACK calls thread-safe #1847
FCOMMON_OPT += -frecursive
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
ifneq ($(NO_LAPACK), 1)
EXTRALIB += -lgfortran
@@ -749,20 +664,16 @@ FCOMMON_OPT += -mabi=32
endif
else
ifdef BINARY64
ifneq ($(OSNAME), AIX)
FCOMMON_OPT += -m64
endif
ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -fdefault-integer-8
endif
endif
else
ifneq ($(OSNAME), AIX)
FCOMMON_OPT += -m32
endif
endif
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -fopenmp
endif
@@ -776,7 +687,7 @@ FCOMMON_OPT += -i8
endif
endif
ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -fopenmp
FCOMMON_OPT += -openmp
endif
endif
@@ -956,10 +867,6 @@ ifeq ($(DYNAMIC_ARCH), 1)
CCOMMON_OPT += -DDYNAMIC_ARCH
endif
ifeq ($(DYNAMIC_OLDER), 1)
CCOMMON_OPT += -DDYNAMIC_OLDER
endif
ifeq ($(NO_LAPACK), 1)
CCOMMON_OPT += -DNO_LAPACK
#Disable LAPACK C interface
@@ -982,10 +889,6 @@ ifeq ($(NO_AVX2), 1)
CCOMMON_OPT += -DNO_AVX2
endif
ifeq ($(NO_AVX512), 1)
CCOMMON_OPT += -DNO_AVX512
endif
ifdef SMP
CCOMMON_OPT += -DSMP_SERVER
@@ -1032,18 +935,10 @@ endif
CCOMMON_OPT += -DMAX_CPU_NUMBER=$(NUM_THREADS)
CCOMMON_OPT += -DMAX_PARALLEL_NUMBER=$(NUM_PARALLEL)
ifdef USE_SIMPLE_THREADED_LEVEL3
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3
endif
ifdef USE_TLS
CCOMMON_OPT += -DUSE_TLS
endif
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
ifndef SYMBOLPREFIX
SYMBOLPREFIX =
endif
@@ -1053,15 +948,9 @@ SYMBOLSUFFIX =
endif
ifndef LIBNAMESUFFIX
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
else
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
endif
ifeq ($(OSNAME), CYGWIN_NT)
LIBPREFIX = cyg$(LIBNAMEBASE)
else
LIBPREFIX = lib$(LIBNAMEBASE)
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
endif
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
@@ -1154,6 +1043,8 @@ ifndef FCOMMON_OPT
FCOMMON_OPT = -O2 -frecursive
endif
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
@@ -1161,12 +1052,6 @@ override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =
ifdef NEED_PIC
ifeq (,$(findstring PIC,$(FFLAGS)))
override FFLAGS += -fPIC
endif
endif
#For LAPACK Fortran codes.
#Disable -fopenmp for LAPACK Fortran codes on Windows.
ifdef OS_WINDOWS
@@ -1224,12 +1109,7 @@ endif
LIBDLLNAME = $(LIBPREFIX).dll
IMPLIBNAME = lib$(LIBNAMEBASE).dll.a
ifneq ($(OSNAME), AIX)
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
else
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a)
endif
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
@@ -1306,7 +1186,6 @@ export MSA_FLAGS
export KERNELDIR
export FUNCTION_PROFILE
export TARGET_CORE
export NO_AVX512
export SGEMM_UNROLL_M
export SGEMM_UNROLL_N

View File

@@ -8,34 +8,6 @@ endif
endif
endif
ifeq ($(CORE), SKYLAKEX)
ifndef DYNAMIC_ARCH
ifndef NO_AVX512
CCOMMON_OPT += -march=skylake-avx512
FCOMMON_OPT += -march=skylake-avx512
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
endif
ifeq ($(CORE), HASWELL)
ifndef DYNAMIC_ARCH
ifndef NO_AVX2
CCOMMON_OPT += -mavx2
FCOMMON_OPT += -mavx2
endif
endif
endif
ifeq ($(OSNAME), Interix)
ARFLAGS = -m x64
endif

237
README.md
View File

@@ -2,224 +2,175 @@
[![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.svg?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS)
Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS)
AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n8nkkx64/branch/develop?svg=true)](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
## Introduction
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
Please read the documentation on the OpenBLAS wiki pages: <http://github.com/xianyi/OpenBLAS/wiki>.
Please read the documents on OpenBLAS wiki pages <http://github.com/xianyi/OpenBLAS/wiki>.
## Binary Packages
We provide official binary packages for the following platform:
We provide binary packages for the following platform.
* Windows x86/x86_64
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
## Installation from Source
Download from project homepage. http://xianyi.github.com/OpenBLAS/
Download from project homepage, http://xianyi.github.com/OpenBLAS/, or check out the code
using Git from https://github.com/xianyi/OpenBLAS.git.
### Dependencies
Building OpenBLAS requires the following to be installed:
* GNU Make
* A C compiler, e.g. GCC or Clang
* A Fortran compiler (optional, for LAPACK)
* IBM MASS (optional, see below)
Or, check out codes from git://github.com/xianyi/OpenBLAS.git
### Normal compile
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
The full target list is in the file `TargetList.txt`.
* type "make" to detect the CPU automatically.
or
* type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt.
### Cross compile
Set `CC` and `FC` to point to the cross toolchains, and set `HOSTCC` to your host C compiler.
The target must be specified explicitly when cross compiling.
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly.
Examples:
* On an x86 box, compile this library for a loongson3a CPU:
```sh
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
```
On X86 box, compile this library for loongson3a CPU.
* On an x86 box, compile this library for a loongson3a CPU with loongcc (based on Open64) compiler:
```sh
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
```
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
On X86 box, compile this library for loongson3a CPU with loongcc (based on Open64) compiler.
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
### Debug version
A debug version can be built using `make DEBUG=1`.
make DEBUG=1
### Compile with MASS support on Power CPU (optional)
### Compile with MASS Support on Power CPU (Optional dependency)
The [IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library
consists of a set of mathematical functions for C, C++, and Fortran applications that are
are tuned for optimum performance on POWER architectures.
OpenBLAS with MASS requires a 64-bit, little-endian OS on POWER.
The library can be installed as shown:
[IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library consists of a set of mathematical functions for C, C++, and
Fortran-language applications that are tuned for optimum performance on POWER architectures. OpenBLAS with MASS requires 64-bit, little-endian OS on POWER.
The library can be installed as below -
* On Ubuntu:
```sh
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list
sudo apt-get update
sudo apt-get install libxlmass-devel.8.1.5
```
* On Ubuntu:
* On RHEL/CentOS:
```sh
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key
sudo rpm --import repomd.xml.key
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/
sudo yum install libxlmass-devel.8.1.5
```
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -</br>
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list</br>
sudo apt-get update</br>
sudo apt-get install libxlmass-devel.8.1.5</br>
After installing the MASS library, compile OpenBLAS with `USE_MASS=1`.
For example, to compile on Power8 with MASS support: `make USE_MASS=1 TARGET=POWER8`.
* On RHEL/CentOS:
### Install to a specific directory (optional)
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key</br>
sudo rpm --import repomd.xml.key</br>
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo</br>
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/</br>
sudo yum install libxlmass-devel.8.1.5</br>
Use `PREFIX=` when invoking `make`, for example
After installing MASS library, compile openblas with USE_MASS=1.
```sh
make install PREFIX=your_installation_directory
```
Example:
The default installation directory is `/opt/OpenBLAS`.
Compiling on Power8 with MASS support -
## Supported CPUs and Operating Systems
make USE_MASS=1 TARGET=POWER8
Please read `GotoBLAS_01Readme.txt`.
### Install to the directory (optional)
### Additional supported CPUs
Example:
#### x86/x86-64
make install PREFIX=your_installation_directory
The default directory is /opt/OpenBLAS
## Support CPU & OS
Please read GotoBLAS_01Readme.txt
### Additional support CPU:
#### x86/x86-64:
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
- **Intel Skylake**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
#### MIPS64
#### MIPS64:
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
- **ICT Loongson 3B**: Experimental
#### ARM
#### ARM:
- **ARMV6**: Optimized BLAS for vfpv2 and vfpv3-d16 ( e.g. BCM2835, Cortex M0+ )
- **ARMV7**: Optimized BLAS for vfpv3-d32 ( e.g. Cortex A8, A9 and A15 )
- **ARMv6**: Optimized BLAS for vfpv2 and vfpv3-d16 (e.g. BCM2835, Cortex M0+)
- **ARMv7**: Optimized BLAS for vfpv3-d32 (e.g. Cortex A8, A9 and A15)
#### ARM64
- **ARMv8**: Experimental
#### ARM64:
- **ARMV8**: Experimental
- **ARM Cortex-A57**: Experimental
#### PPC/PPC64
- **POWER8**: Optmized Level-3 BLAS and some Level-1, only with `USE_OPENMP=1`
#### IBM zEnterprise System
- **Z13**: Optimized Level-3 BLAS and Level-1,2 (double precision)
### Supported OS
#### IBM zEnterprise System:
- **Z13**: Optimized Level-3 BLAS
### Support OS:
- **GNU/Linux**
- **MinGW or Visual Studio (CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
- **Darwin/macOS**: Experimental. Although GotoBLAS2 supports Darwin, we are not macOS experts.
- **FreeBSD**: Supported by the community. We don't actively test the library on this OS.
- **OpenBSD**: Supported by the community. We don't actively test the library on this OS.
- **DragonFly BSD**: Supported by the community. We don't actively test the library on this OS.
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
- **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
- **Android**: Supported by community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
## Usage
## Usages
Link with libopenblas.a or -lopenblas for shared library.
Statically link with `libopenblas.a` or dynamically link with `-lopenblas` if OpenBLAS was
compiled as a shared library.
### Set the number of threads with environment variables.
### Setting the number of threads using environment variables
Examples:
Environment variables are used to specify a maximum number of threads.
For example,
export OPENBLAS_NUM_THREADS=4
```sh
export OPENBLAS_NUM_THREADS=4
export GOTO_NUM_THREADS=4
export OMP_NUM_THREADS=4
```
or
The priorities are `OPENBLAS_NUM_THREADS` > `GOTO_NUM_THREADS` > `OMP_NUM_THREADS`.
export GOTO_NUM_THREADS=4
If you compile this library with `USE_OPENMP=1`, you should set the `OMP_NUM_THREADS`
environment variable; OpenBLAS ignores `OPENBLAS_NUM_THREADS` and `GOTO_NUM_THREADS` when
compiled with `USE_OPENMP=1`.
or
### Setting the number of threads at runtime
export OMP_NUM_THREADS=4
We provide the following functions to control the number of threads at runtime:
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
```c
void goto_set_num_threads(int num_threads);
void openblas_set_num_threads(int num_threads);
```
If you compile this lib with USE_OPENMP=1, you should set OMP_NUM_THREADS environment variable. OpenBLAS ignores OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS with USE_OPENMP=1.
If you compile this library with `USE_OPENMP=1`, you should use the above functions too.
### Set the number of threads on runtime.
## Reporting bugs
We provided the below functions to control the number of threads on runtime.
Please submit an issue in https://github.com/xianyi/OpenBLAS/issues.
void goto_set_num_threads(int num_threads);
void openblas_set_num_threads(int num_threads);
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
## Report Bugs
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
## Contact
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev
## Change log
Please see Changelog.txt to view the differences between OpenBLAS and GotoBLAS2 1.13 BSD version.
## ChangeLog
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
## Troubleshooting
* Please read the [FAQ](https://github.com/xianyi/OpenBLAS/wiki/Faq) first.
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
Clang 3.0 will generate the wrong AVX binary code.
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake AVX512 kernels.
* The number of CPUs/cores should less than or equal to 256. On Linux `x86_64` (`amd64`),
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
the library with `BIGNUMA=1`.
* OpenBLAS does not set processor affinity by default.
On Linux, you can enable processor affinity by commenting out the line `NO_AFFINITY=1` in
Makefile.rule. However, note that this may cause
[a conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
* On Loongson 3A, `make test` may fail with a `pthread_create` error (`EAGAIN`).
However, it will be okay when you run the same test case on the shell.
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
## Contributing
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue
to start a discussion around a feature idea or a bug.
2. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
3. Write a test which shows that the bug was fixed or that the feature works as expected.
4. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue to start a discussion around a feature idea or a bug.
1. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
1. Write a test which shows that the bug was fixed or that the feature works as expected.
1. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
## Donation
Please read [this wiki page](https://github.com/xianyi/OpenBLAS/wiki/Donation).

View File

@@ -20,7 +20,6 @@ DUNNINGTON
NEHALEM
SANDYBRIDGE
HASWELL
SKYLAKEX
ATOM
b)AMD CPU:
@@ -57,7 +56,6 @@ CELL
3.MIPS CPU:
P5600
1004K
4.MIPS64 CPU:
SICORTEX
@@ -65,7 +63,6 @@ LOONGSON3A
LOONGSON3B
I6400
P6600
I6500
5.IA64 CPU:
ITANIUM2
@@ -83,14 +80,8 @@ ARMV5
8.ARM 64-bit CPU:
ARMV8
CORTEXA53
CORTEXA57
CORTEXA72
CORTEXA73
FALKOR
VULCAN
THUNDERX
THUNDERX2T99
9.System Z:
ZARCH_GENERIC
Z13

View File

@@ -14,20 +14,6 @@ Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
`MAX_CPU_NUMBER=NUM_THREADS`.
Despite its name, and due to the use of memory buffers in functions like SGEMM,
the setting of NUM_THREADS can be relevant even for a single-threaded build
of OpenBLAS, if such functions get called by multiple threads of a program
that uses OpenBLAS. In some cases, the affected code may simply crash or throw
a segmentation fault without displaying the above warning first.
Note that the number of threads used at runtime can be altered to differ from the
value NUM_THREADS was set to at build time. At runtime, the actual number of
threads can be set anywhere from 1 to the build's NUM_THREADS (note however,
that this does not change the number of memory buffers that will be allocated,
which is set at build time). The number of threads for a process can be set by
using the mechanisms described below.
#### How can I use OpenBLAS in multi-threaded applications?
If your application is already multi-threaded, it will conflict with OpenBLAS

View File

@@ -5,8 +5,6 @@ version: 0.2.19.{build}
platform:
- x64
os: Visual Studio 2017
configuration: Release
clone_folder: c:\projects\OpenBLAS
@@ -14,53 +12,33 @@ clone_folder: c:\projects\OpenBLAS
init:
- git config --global core.autocrlf input
build:
project: OpenBLAS.sln
clone_depth: 5
#branches to build
branches:
only:
- master
- develop
- cmake
skip_tags: true
matrix:
fast_finish: false
fast_finish: true
skip_commits:
# Add [av skip] to commit messages
message: /\[av skip\]/
environment:
global:
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
matrix:
- COMPILER: clang-cl
WITH_FORTRAN: yes
- COMPILER: clang-cl
DYNAMIC_ARCH: ON
WITH_FORTRAN: no
- COMPILER: cl
install:
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake
- if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
before_build:
- ps: if (-Not (Test-Path .\build)) { mkdir build }
- cd build
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" ..
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl ..
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON ..
build_script:
- cmake --build .
- echo Running cmake...
- cd c:\projects\OpenBLAS
- cmake -G "Visual Studio 12 Win64" .
test_script:
- echo Running Test
- cd utest
- cd c:\projects\OpenBLAS\utest
- openblas_utest

View File

@@ -121,15 +121,13 @@ static void *huge_malloc(BLASLONG size){
int main(int argc, char *argv[]){
FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 0.0};
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {0.0, 0.0};
char transa = 'N';
char transb = 'N';
blasint m, n, k, i, j, lda, ldb, ldc;
char trans='N';
blasint m, n, i, j;
int loops = 1;
int has_param_m = 0;
int has_param_n = 0;
int has_param_k = 0;
int has_param_n=0;
int l;
char *p;
int from = 1;
@@ -137,108 +135,86 @@ int main(int argc, char *argv[]){
int step = 1;
struct timeval start, stop;
double time1, timeg;
double time1,timeg;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++; }
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
if (argc > 0) { step = atol(*argv); argc--; argv++; }
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_TRANS"))) {
transa=*p;
transb=*p;
}
if ((p = getenv("OPENBLAS_TRANSA"))) {
transa=*p;
}
if ((p = getenv("OPENBLAS_TRANSB"))) {
transb=*p;
}
TOUPPER(transa);
TOUPPER(transb);
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb);
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
p = getenv("OPENBLAS_LOOPS");
if ( p != NULL ) {
loops = atoi(p);
}
if ( p != NULL )
loops = atoi(p);
if ((p = getenv("OPENBLAS_PARAM_M"))) {
m = atoi(p);
has_param_m=1;
} else {
m = to;
}
if ((p = getenv("OPENBLAS_PARAM_N"))) {
n = atoi(p);
has_param_n=1;
} else {
n = to;
}
if ((p = getenv("OPENBLAS_PARAM_K"))) {
k = atoi(p);
has_param_k=1;
} else {
k = to;
}
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
n = atoi(p);
has_param_n=1;
}
#ifdef linux
srandom(getpid());
#endif
for (i = 0; i < m * k * COMPSIZE; i++) {
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < k * n * COMPSIZE; i++) {
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < m * n * COMPSIZE; i++) {
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
fprintf(stderr, " SIZE Flops Time\n");
for(j = 0; j < to; j++){
for(i = 0; i < to * COMPSIZE; i++){
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}
fprintf(stderr, " SIZE Flops Time\n");
for(m = from; m <= to; m += step)
{
for (i = from; i <= to; i += step) {
timeg=0;
if (!has_param_m) { m = i; }
if (!has_param_n) { n = i; }
if (!has_param_k) { k = i; }
if ( has_param_n == 1 && n <= m )
n=n;
else
n=m;
if (transa == 'N') { lda = m; }
else { lda = k; }
if (transb == 'N') { ldb = k; }
else { ldb = n; }
ldc = m;
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
gettimeofday( &start, (struct timezone *)0);
for (j=0; j<loops; j++) {
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
}
for (l=0; l<loops; l++)
{
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
}
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg = time1/loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)k * (double)m * (double)n / timeg * 1.e-6, time1);
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
}
return 0;

View File

@@ -122,7 +122,7 @@ int main(int argc, char *argv[]){
FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 0.0};
FLOAT beta [] = {1.0, 1.0};
char trans='N';
blasint m, i, j;
blasint inc_x=1,inc_y=1;

22
c_check
View File

@@ -54,8 +54,6 @@ $compiler = GCC if ($compiler eq "");
$os = Linux if ($data =~ /OS_LINUX/);
$os = FreeBSD if ($data =~ /OS_FREEBSD/);
$os = NetBSD if ($data =~ /OS_NETBSD/);
$os = OpenBSD if ($data =~ /OS_OPENBSD/);
$os = DragonFly if ($data =~ /OS_DRAGONFLY/);
$os = Darwin if ($data =~ /OS_DARWIN/);
$os = SunOS if ($data =~ /OS_SUNOS/);
$os = AIX if ($data =~ /OS_AIX/);
@@ -64,7 +62,6 @@ $os = WINNT if ($data =~ /OS_WINNT/);
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
$os = Interix if ($data =~ /OS_INTERIX/);
$os = Android if ($data =~ /OS_ANDROID/);
$os = Haiku if ($data =~ /OS_HAIKU/);
$architecture = x86 if ($data =~ /ARCH_X86/);
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
@@ -202,21 +199,6 @@ $architecture = zarch if ($data =~ /ARCH_ZARCH/);
$binformat = bin32;
$binformat = bin64 if ($data =~ /BINARY_64/);
$no_avx512= 0;
if (($architecture eq "x86") || ($architecture eq "x86_64")) {
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
$args = " -march=skylake-avx512 -o $tmpf.o -x c $tmpf";
my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null");
system(@cmd) == 0;
if ($? != 0) {
$no_avx512 = 1;
} else {
$no_avx512 = 0;
}
unlink("tmpf.o");
}
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
$data =~ /globl\s([_\.]*)(.*)/;
@@ -224,6 +206,7 @@ $data =~ /globl\s([_\.]*)(.*)/;
$need_fu = $1;
$cross = 0;
$cross = 1 if ($os ne $hostos);
if ($architecture ne $hostarch) {
$cross = 1;
@@ -231,8 +214,6 @@ if ($architecture ne $hostarch) {
$cross = 0 if (($hostarch eq "mips64") && ($architecture eq "mips"));
}
$cross = 1 if ($os ne $hostos);
$openmp = "" if $ENV{USE_OPENMP} != 1;
$linker_L = "";
@@ -305,7 +286,6 @@ print MAKEFILE "CROSS=1\n" if $cross != 0;
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
$os =~ tr/[a-z]/[A-Z]/;
$architecture =~ tr/[a-z]/[A-Z]/;

176
cblas.h
View File

@@ -51,57 +51,51 @@ typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=1
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
typedef CBLAS_ORDER CBLAS_LAYOUT;
float cblas_sdsdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
double cblas_dsdot (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
float cblas_sdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
double cblas_ddot(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy);
openblas_complex_float cblas_cdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
openblas_complex_float cblas_cdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy);
openblas_complex_double cblas_zdotu(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
openblas_complex_double cblas_zdotc(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy);
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *y, OPENBLAS_CONST blasint incy, void *ret);
void cblas_cdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
void cblas_cdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *y, OPENBLAS_CONST blasint incy, openblas_complex_float *ret);
void cblas_zdotu_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
void cblas_zdotc_sub(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *y, OPENBLAS_CONST blasint incy, openblas_complex_double *ret);
float cblas_sasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
double cblas_dasum (OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
float cblas_scasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
double cblas_dzasum(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
float cblas_snrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
double cblas_dnrm2 (OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX);
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX);
float cblas_scnrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX);
double cblas_dznrm2(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX);
CBLAS_INDEX cblas_isamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_idamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_isamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_icamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_izamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
void cblas_saxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
void cblas_zcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
void cblas_sswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
void cblas_dswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
void cblas_cswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_zswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_cswap(OPENBLAS_CONST blasint n, float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
void cblas_zswap(OPENBLAS_CONST blasint n, double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
@@ -117,59 +111,59 @@ void cblas_drotmg(double *d1, double *d2, double *b1, OPENBLAS_CONST double b2,
void cblas_sscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
void cblas_dscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX);
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, void *X, OPENBLAS_CONST blasint incX);
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, void *X, OPENBLAS_CONST blasint incX);
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, void *X, OPENBLAS_CONST blasint incX);
void cblas_cscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, float *X, OPENBLAS_CONST blasint incX);
void cblas_zscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, double *X, OPENBLAS_CONST blasint incX);
void cblas_csscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, float *X, OPENBLAS_CONST blasint incX);
void cblas_zdscal(OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, double *X, OPENBLAS_CONST blasint incX);
void cblas_sgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
void cblas_dgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
void cblas_cgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
void cblas_zgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
void cblas_sger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
void cblas_dger (OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
void cblas_cgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
void cblas_cgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
void cblas_zgeru(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
void cblas_zgerc(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
void cblas_strsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
void cblas_dtrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
void cblas_ctrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
void cblas_ztrsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
void cblas_strmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
void cblas_dtrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
void cblas_ctrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
void cblas_ztrmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
void cblas_ssyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
void cblas_dsyr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda);
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A, OPENBLAS_CONST blasint lda);
void cblas_cher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A, OPENBLAS_CONST blasint lda);
void cblas_zher(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *A, OPENBLAS_CONST blasint lda);
void cblas_ssyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo,OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X,
OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
void cblas_dsyr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,
OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX,
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX,
OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *A, OPENBLAS_CONST blasint lda);
void cblas_cher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX,
OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A, OPENBLAS_CONST blasint lda);
void cblas_zher2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX,
OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A, OPENBLAS_CONST blasint lda);
void cblas_sgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
void cblas_dgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
void cblas_cgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
void cblas_zgbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
OPENBLAS_CONST blasint KL, OPENBLAS_CONST blasint KU, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
void cblas_ssbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
@@ -182,45 +176,45 @@ void cblas_stbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLA
void cblas_dtbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
void cblas_ctbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
void cblas_ztbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
void cblas_stbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
void cblas_dtbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
void cblas_ctbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *X, OPENBLAS_CONST blasint incX);
void cblas_ztbsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *X, OPENBLAS_CONST blasint incX);
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *X, OPENBLAS_CONST blasint incX);
void cblas_stpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
void cblas_dtpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
void cblas_ctpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
void cblas_ztpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
void cblas_stpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
void cblas_dtpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
void cblas_ctpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
OPENBLAS_CONST blasint N, OPENBLAS_CONST float *Ap, float *X, OPENBLAS_CONST blasint incX);
void cblas_ztpsv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_DIAG Diag,
OPENBLAS_CONST blasint N, OPENBLAS_CONST void *Ap, void *X, OPENBLAS_CONST blasint incX);
OPENBLAS_CONST blasint N, OPENBLAS_CONST double *Ap, double *X, OPENBLAS_CONST blasint incX);
void cblas_ssymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A,
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float beta, float *Y, OPENBLAS_CONST blasint incY);
void cblas_dsymv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A,
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double beta, double *Y, OPENBLAS_CONST blasint incY);
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A,
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A,
OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
void cblas_chemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A,
OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
void cblas_zhemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A,
OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
void cblas_sspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *Ap,
@@ -231,36 +225,36 @@ void cblas_dspmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLA
void cblas_sspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *Ap);
void cblas_dspr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, double *Ap);
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, void *A);
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST void *X,OPENBLAS_CONST blasint incX, void *A);
void cblas_chpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, float *A);
void cblas_zhpr(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X,OPENBLAS_CONST blasint incX, double *A);
void cblas_sspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *A);
void cblas_dspr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *A);
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap);
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *Y, OPENBLAS_CONST blasint incY, void *Ap);
void cblas_chpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *Y, OPENBLAS_CONST blasint incY, float *Ap);
void cblas_zhpr2(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *Y, OPENBLAS_CONST blasint incY, double *Ap);
void cblas_chbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
void cblas_zhbmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
void cblas_chpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *Ap, OPENBLAS_CONST float *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST float *beta, float *Y, OPENBLAS_CONST blasint incY);
void cblas_zhpmv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint N,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *Ap, OPENBLAS_CONST void *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST void *beta, void *Y, OPENBLAS_CONST blasint incY);
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *Ap, OPENBLAS_CONST double *X, OPENBLAS_CONST blasint incX, OPENBLAS_CONST double *beta, double *Y, OPENBLAS_CONST blasint incY);
void cblas_sgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_dgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_cgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_cgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
@@ -268,60 +262,60 @@ void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
void cblas_dsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_csymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_zsymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_ssyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_dsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_csyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_zsyrk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_ssyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_dsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_csyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_zsyr2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans,
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_strmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
void cblas_dtrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
void cblas_ctrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
void cblas_ztrmm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
void cblas_strsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
void cblas_dtrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
void cblas_ctrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, float *B, OPENBLAS_CONST blasint ldb);
void cblas_ztrsm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA,
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, void *B, OPENBLAS_CONST blasint ldb);
OPENBLAS_CONST enum CBLAS_DIAG Diag, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, double *B, OPENBLAS_CONST blasint ldb);
void cblas_chemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float *beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_zhemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double *beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_cherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST float alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_zherk(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST double alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc);
OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_xerbla(blasint p, char *rout, char *form, ...);
@@ -331,9 +325,9 @@ void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS
void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy);
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy);
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST void *beta, void *y, OPENBLAS_CONST blasint incy);
void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy);
void cblas_somatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, OPENBLAS_CONST float *a,
OPENBLAS_CONST blasint clda, float *b, OPENBLAS_CONST blasint cldb);

View File

@@ -1,79 +0,0 @@
# OpenBLASConfig.cmake
# --------------------
#
# OpenBLAS cmake module.
# This module sets the following variables in your project::
#
# OpenBLAS_FOUND - true if OpenBLAS and all required components found on the system
# OpenBLAS_VERSION - OpenBLAS version in format Major.Minor.Release
# OpenBLAS_INCLUDE_DIRS - Directory where OpenBLAS header is located.
# OpenBLAS_INCLUDE_DIR - same as DIRS
# OpenBLAS_LIBRARIES - OpenBLAS library to link against.
# OpenBLAS_LIBRARY - same as LIBRARIES
#
#
# Available components::
#
## shared - search for only shared library
## static - search for only static library
# serial - search for unthreaded library
# pthread - search for native pthread threaded library
# openmp - search for OpenMP threaded library
#
#
# Exported targets::
#
# If OpenBLAS is found, this module defines the following :prop_tgt:`IMPORTED`
## target. Target is shared _or_ static, so, for both, use separate, not
## overlapping, installations. ::
#
# OpenBLAS::OpenBLAS - the main OpenBLAS library #with header & defs attached.
#
#
# Suggested usage::
#
# find_package(OpenBLAS)
# find_package(OpenBLAS 0.2.20 EXACT CONFIG REQUIRED COMPONENTS pthread)
#
#
# The following variables can be set to guide the search for this package::
#
# OpenBLAS_DIR - CMake variable, set to directory containing this Config file
# CMAKE_PREFIX_PATH - CMake variable, set to root directory of this package
# PATH - environment variable, set to bin directory of this package
# CMAKE_DISABLE_FIND_PACKAGE_OpenBLAS - CMake variable, disables
# find_package(OpenBLAS) when not REQUIRED, perhaps to force internal build
@PACKAGE_INIT@
set(PN OpenBLAS)
# need to check that the @USE_*@ evaluate to something cmake can perform boolean logic upon
if(@USE_OPENMP@)
set(${PN}_openmp_FOUND 1)
elseif(@USE_THREAD@)
set(${PN}_pthread_FOUND 1)
else()
set(${PN}_serial_FOUND 1)
endif()
check_required_components(${PN})
#-----------------------------------------------------------------------------
# Don't include targets if this file is being picked up by another
# project which has already built this as a subproject
#-----------------------------------------------------------------------------
if(NOT TARGET ${PN}::OpenBLAS)
include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake")
get_property(_loc TARGET ${PN}::OpenBLAS PROPERTY LOCATION)
set(${PN}_LIBRARY ${_loc})
get_property(_ill TARGET ${PN}::OpenBLAS PROPERTY INTERFACE_LINK_LIBRARIES)
set(${PN}_LIBRARIES ${_ill})
get_property(_id TARGET ${PN}::OpenBLAS PROPERTY INCLUDE_DIRECTORIES)
set(${PN}_INCLUDE_DIR ${_id})
get_property(_iid TARGET ${PN}::OpenBLAS PROPERTY INTERFACE_INCLUDE_DIRECTORIES)
set(${PN}_INCLUDE_DIRS ${_iid})
endif()

View File

@@ -3,9 +3,9 @@
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets various variables based on architecture.
if (X86 OR X86_64)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
if (X86)
if (${ARCH} STREQUAL "x86")
if (NOT BINARY)
set(NO_BINARY_MODE 1)
endif ()
@@ -33,46 +33,47 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
endif ()
if (USE_OPENMP)
# USE_SIMPLE_THREADED_LEVEL3 = 1
# NO_AFFINITY = 1
find_package(OpenMP REQUIRED)
if (OpenMP_FOUND)
set(CCOMMON_OPT "${CCOMMON_OPT} ${OpenMP_C_FLAGS} -DUSE_OPENMP")
set(FCOMMON_OPT "${FCOMMON_OPT} ${OpenMP_Fortran_FLAGS}")
endif()
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
message(WARNING "Clang doesn't support OpenMP yet.")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif ()
endif ()
if (DYNAMIC_ARCH)
if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99)
endif ()
if (X86)
set(DYNAMIC_CORE KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO)
if (${ARCH} STREQUAL "x86")
set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
endif ()
if (X86_64)
set(DYNAMIC_CORE PRESCOTT CORE2)
if (DYNAMIC_OLDER)
set (DYNAMIC_CORE ${DYNAMIC_CORE} PENRYN DUNNINGTON)
endif ()
set (DYNAMIC_CORE ${DYNAMIC_CORE} NEHALEM)
if (DYNAMIC_OLDER)
set (DYNAMIC_CORE ${DYNAMIC_CORE} OPTERON OPTERON_SSE3)
endif ()
set (DYNAMIC_CORE ${DYNAMIC_CORE} BARCELONA)
if (DYNAMIC_OLDER)
set (DYNAMIC_CORE ${DYNAMIC_CORE} BOBCAT ATOM NANO)
endif ()
if (${ARCH} STREQUAL "x86_64")
set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
if (NOT NO_AVX)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR)
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
endif ()
if (NOT NO_AVX2)
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
endif ()
if (NOT NO_AVX512)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL ZEN")
endif ()
endif ()
@@ -93,7 +94,7 @@ if (${ARCH} STREQUAL "ia64")
endif ()
endif ()
if (MIPS64)
if (${ARCH} STREQUAL "mips64")
set(NO_BINARY_MODE 1)
endif ()
@@ -102,12 +103,12 @@ if (${ARCH} STREQUAL "alpha")
set(BINARY_DEFINED 1)
endif ()
if (ARM)
if (${ARCH} STREQUAL "arm")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()
if (ARM64)
if (${ARCH} STREQUAL "arm64")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()

98
cmake/c_check.cmake Normal file
View File

@@ -0,0 +1,98 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from the OpenBLAS/c_check perl script.
## This is triggered by prebuild.cmake and runs before any of the code is built.
## Creates config.h and Makefile.conf.
# CMake vars set by this file:
# OSNAME (use CMAKE_SYSTEM_NAME)
# ARCH
# C_COMPILER (use CMAKE_C_COMPILER)
# BINARY32
# BINARY64
# FU
# CROSS_SUFFIX
# CROSS
# CEXTRALIB
# Defines set by this file:
# OS_
# ARCH_
# C_
# __32BIT__
# __64BIT__
# FUNDERSCORE
# PTHREAD_CREATE_FUNC
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
set(FU "")
if(APPLE)
set(FU "_")
elseif(MSVC)
set(FU "_")
elseif(UNIX)
set(FU "")
endif()
# Convert CMake vars into the format that OpenBLAS expects
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
if (${HOST_OS} STREQUAL "WINDOWS")
set(HOST_OS WINNT)
endif ()
# added by hpa - check size of void ptr to detect 64-bit compile
if (NOT DEFINED BINARY)
set(BINARY 32)
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
set(BINARY 64)
endif ()
endif ()
if (BINARY EQUAL 64)
set(BINARY64 1)
else ()
set(BINARY32 1)
endif ()
# CMake docs define these:
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
#
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
if (${ARCH} STREQUAL "AMD64")
set(ARCH "x86_64")
endif ()
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
set(ARCH x86)
endif ()
if (${ARCH} STREQUAL "X86")
set(ARCH x86)
endif ()
if (${ARCH} MATCHES "ppc")
set(ARCH power)
endif ()
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
if (${COMPILER_ID} STREQUAL "GNU")
set(COMPILER_ID "GCC")
endif ()
string(TOUPPER ${ARCH} UC_ARCH)
file(WRITE ${TARGET_CONF}
"#define OS_${HOST_OS}\t1\n"
"#define ARCH_${UC_ARCH}\t1\n"
"#define C_${COMPILER_ID}\t1\n"
"#define __${BINARY}BIT__\t1\n"
"#define FUNDERSCORE\t${FU}\n")
if (${HOST_OS} STREQUAL "WINDOWSSTORE")
file(APPEND ${TARGET_CONF}
"#define OS_WINNT\t1\n")
endif ()

View File

@@ -15,7 +15,7 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR
if (NO_BINARY_MODE)
if (MIPS64)
if (${ARCH} STREQUAL "mips64")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
else ()
@@ -24,12 +24,17 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR
set(BINARY_DEFINED 1)
endif ()
if (${CORE} STREQUAL "LOONGSON3A" OR ${CORE} STREQUAL "LOONGSON3B")
if (${CORE} STREQUAL "LOONGSON3A")
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
endif ()
if (CMAKE_SYSTEM_NAME STREQUAL "AIX")
if (${CORE} STREQUAL "LOONGSON3B")
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
endif ()
if (${OSNAME} STREQUAL "AIX")
set(BINARY_DEFINED 1)
endif ()
endif ()
@@ -61,7 +66,7 @@ endif ()
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
if (MIPS64)
if (${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
@@ -89,10 +94,10 @@ endif ()
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
set(CCOMMON_OPT "${CCOMMON_OPT} -w")
if (X86)
if (${ARCH} STREQUAL "x86")
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
endif ()

View File

@@ -51,8 +51,7 @@ else()
endif()
add_custom_command(
OUTPUT ${PROJECT_BINARY_DIR}/openblas.def
#TARGET ${OpenBLAS_LIBNAME} PRE_LINK
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
COMMAND perl
ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
COMMENT "Create openblas.def file"

View File

@@ -20,6 +20,12 @@
# NEEDBUNDERSCORE
# NEED2UNDERSCORES
if (MSVC)
# had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif ()
if (NOT NO_LAPACK)
enable_language(Fortran)
else()
@@ -28,13 +34,17 @@ else()
endif()
if (NOT ONLY_CBLAS)
# N.B. f_check is not cross-platform, so instead try to use CMake variables
# run f_check (appends to TARGET files)
# message(STATUS "Running f_check...")
# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
# TODO: set FEXTRALIB flags a la f_check?
set(BU "_")
file(APPEND ${TARGET_CONF_TEMP}
file(APPEND ${TARGET_CONF}
"#define BUNDERSCORE _\n"
"#define NEEDBUNDERSCORE 1\n"
"#define NEED2UNDERSCORES 0\n")
@@ -46,7 +56,7 @@ else ()
set(NO_FBLAS 1)
#set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
set(BU "_")
file(APPEND ${TARGET_CONF_TEMP}
file(APPEND ${TARGET_CONF}
"#define BUNDERSCORE _\n"
"#define NEEDBUNDERSCORE 1\n")
endif()

View File

@@ -3,15 +3,15 @@
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets Fortran related variables.
if (INTERFACE64)
set(SUFFIX64 64)
set(SUFFIX64_UNDERSCORE _64)
endif()
if (${F_COMPILER} STREQUAL "FLANG")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG")
if (BINARY64 AND INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
@@ -44,13 +44,13 @@ endif ()
if (${F_COMPILER} STREQUAL "GFORTRAN")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
if (NOT NO_LAPACK)
set(EXTRALIB "{EXTRALIB} -lgfortran")
endif ()
if (NO_BINARY_MODE)
if (MIPS64)
if (${ARCH} STREQUAL "mips64")
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
else ()
@@ -130,7 +130,7 @@ if (${F_COMPILER} STREQUAL "PATHSCALE")
endif ()
endif ()
if (NOT MIPS64)
if (NOT ${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
@@ -158,7 +158,7 @@ if (${F_COMPILER} STREQUAL "OPEN64")
endif ()
endif ()
if (MIPS64)
if (${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
@@ -189,7 +189,7 @@ endif ()
if (${F_COMPILER} STREQUAL "SUN")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
if (X86)
if (${ARCH} STREQUAL "x86")
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")

View File

@@ -1,485 +1,387 @@
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
../INSTALL/ilaver.f xerbla_array.f
../INSTALL/slamch.f)
set(ALLAUX
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
ilaprec.f ilatrans.f ilauplo.f iladiag.f iparam2stage.F chla_transtype.f
../INSTALL/ilaver.f ../INSTALL/slamch.f
)
set(SCLAUX
sbdsdc.f
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
slagts.f slamrg.f slanst.f
slapy2.f slapy3.f slarnv.f
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
slarrk.f slarrr.f slaneg.f
slartg.f slaruv.f slas2.f slascl.f
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
ssteqr.f ssterf.f slaisnan.f sisnan.f
slartgp.f slartgs.f
../INSTALL/second_${TIMER}.f)
sbdsdc.f
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
slagts.f slamrg.f slanst.f
slapy2.f slapy3.f slarnv.f
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
slarrk.f slarrr.f slaneg.f
slartg.f slaruv.f slas2.f slascl.f
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
ssteqr.f ssterf.f slaisnan.f sisnan.f
slartgp.f slartgs.f
../INSTALL/second_${TIMER}.f
)
set(DZLAUX
dbdsdc.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
dlagts.f dlamrg.f dlanst.f
dlapy2.f dlapy3.f dlarnv.f
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
dlarrk.f dlarrr.f dlaneg.f
dlartg.f dlaruv.f dlas2.f dlascl.f
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f)
dbdsdc.f dbdsvdx.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
dlagts.f dlamrg.f dlanst.f
dlapy2.f dlapy3.f dlarnv.f
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
dlarrk.f dlarrr.f dlaneg.f
dlartg.f dlaruv.f dlas2.f dlascl.f
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f
dgetsls.f dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f
dtplqt2.f dtpmlqt.f dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f
dsytf2_rk.f dlasyf_rk.f dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f
dsytri_3x.f dsysv_rk.f dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f
dsbevx_2stage.f dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f
dsyevx_2stage.f dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F
dsytrd_sy2sb.f dlarfy.f
)
set(SLASRC
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
sgehd2.f sgehrd.f sgelq2.f sgelqf.f
sgels.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f
sgeqp3.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgerq2.f sgerqf.f
sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f sgetc2.f
sgetrf2.f sgetri.f
sggbak.f sggbal.f
sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f
sggglm.f sgghrd.f sgghd3.f sgglse.f sggqrf.f
sggrqf.f sggsvd3.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
slansy.f slantb.f slantp.f slantr.f slanv2.f
slapll.f slapmt.f
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
spbstf.f spbsv.f spbsvx.f
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
sposvx.f spotrf2.f spotri.f spstrf.f spstf2.f
sppcon.f sppequ.f
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
sstevx.f ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
ssyswapr.f ssytrs.f ssytrs2.f
ssyconv.f ssyconvf.f ssyconvf_rook.f
ssysv_aa.f ssysv_aa_2stage.f ssytrf_aa.f ssytrf_aa_2stage.f ssytrs_aa.f ssytrs_aa_2stage.f
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
ssytri_rook.f ssycon_rook.f ssysv_rook.f
ssytf2_rk.f ssytrf_rk.f ssytrs_3.f
ssytri_3.f ssytri_3x.f ssycon_3.f ssysv_rk.f
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f
stbcon.f
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
stptrs.f
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
strtrs.f stzrzf.f sstemr.f
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
sgeequb.f ssyequb.f spoequb.f sgbequb.f
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f
sgelqt.f sgelqt3.f sgemlqt.f
sgetsls.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
sgelq.f slaswlq.f slamswlq.f sgemlq.f
stplqt.f stplqt2.f stpmlqt.f
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f)
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f
sgetc2.f sgetri.f sgetrf2.f
sggbak.f sggbal.f sgghd3.f sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f
sggglm.f sgghrd.f sgglse.f sggqrf.f
sggrqf.f DEPRECATED/sggsvd.f sggsvd3.f DEPRECATED/sggsvp.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
slansy.f slantb.f slantp.f slantr.f slanv2.f
slapll.f slapmt.f
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
spbstf.f spbsv.f spbsvx.f
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
sposvx.f spstrf.f spstf2.f
sppcon.f sppequ.f
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
sstevx.f
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
ssytri_rook.f ssycon_rook.f ssysv_rook.f
stbcon.f
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
stptrs.f
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
sgeequb.f ssyequb.f spoequb.f sgbequb.f
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
sgelq.f sgelqt.f sgelqt3.f sgemlq.f sgemlqt.f sgemqr.f sgeqr.f sgetsls.f
slamswlq.f slamtsqr.f slaswlq.f slatsqr.f stplqt.f stplqt2.f stpmlqt.f
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f slasyf_aa.f ssytf2_rk.f slasyf_rk.f
ssytrf_rk.f ssytrs_3.f ssycon_3.f ssytri_3.f ssytri_3x.f ssysv_rk.f
ssb2st_kernels.f ssbev_2stage.f ssbevd_2stage.f ssbevx_2stage.f
ssyev_2stage.f ssyevd_2stage.f ssyevr_2stage.f ssyevx_2stage.f
ssygv_2stage.f ssytrd_2stage.f ssytrd_sb2st.F ssytrd_sy2sb.f slarfy.f
)
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
sla_syrfsx_extended.f sla_syamv.f sla_syrcond.f sla_syrpvgrw.f
sposvxx.f sporfsx.f sla_porfsx_extended.f sla_porcond.f
sla_porpvgrw.f sgbsvxx.f sgbrfsx.f sla_gbrfsx_extended.f
sla_gbamv.f sla_gbrcond.f sla_gbrpvgrw.f sla_lin_berr.f slarscl2.f
slascl2.f sla_wwaddw.f)
set(DSLASRC spotrs.f spotrf2.f)
set(CLASRC
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
cgehd2.f cgehrd.f cgelq2.f cgelqf.f
cgels.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f cgerq2.f cgerqf.f
cgesc2.f cgesdd.f cgesvd.f cgesvdx.f
cgesvj.f cgejsv.f cgsvj0.f cgsvj1.f
cgesvx.f cgetc2.f cgetrf2.f
cgetri.f
cggbak.f cggbal.f
cgges.f cgges3.f cggesx.f cggev.f cggev3.f cggevx.f
cggglm.f cgghrd.f cgghd3.f cgglse.f cggqrf.f cggrqf.f
cggsvd3.f cggsvp3.f
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
chetf2.f chetrd.f
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
chetrs.f chetrs2.f
chetf2_rook.f chetrf_rook.f chetri_rook.f
chetrs_rook.f checon_rook.f chesv_rook.f
chetf2_rk.f chetrf_rk.f chetri_3.f chetri_3x.f
chetrs_3.f checon_3.f chesv_rk.f
chesv_aa.f chesv_aa_2stage.f chetrf_aa.f chetrf_aa_2stage.f chetrs_aa.f chetrs_aa_2stage.f
chgeqz.f chpcon.f chpev.f chpevd.f
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
chpsvx.f
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
claed0.f claed7.f claed8.f
claein.f claesy.f claev2.f clags2.f clagtm.f
clahef.f clahef_rook.f clahef_rk.f clahef_aa.f clahqr.f
clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
clanhb.f clanhe.f
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
clarf.f clarfb.f clarfg.f clarfgp.f clarft.f
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
cposv.f cposvx.f cpotrf2.f cpotri.f cpstrf.f cpstf2.f
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
crot.f cspcon.f csprfs.f cspsv.f
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
cstegr.f cstein.f csteqr.f csycon.f
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f
csytri2.f csytri2x.f csyswapr.f
csytrs.f csytrs2.f
csyconv.f csyconvf.f csyconvf_rook.f
csytf2_rook.f csytrf_rook.f csytrs_rook.f
csytri_rook.f csycon_rook.f csysv_rook.f
csytf2_rk.f csytrf_rk.f csytrf_aa.f csytrf_aa_2stage.f csytrs_3.f csytrs_aa.f csytrs_aa_2stage.f
csytri_3.f csytri_3x.f csycon_3.f csysv_rk.f csysv_aa.f csysv_aa_2stage.f
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
ctprfs.f ctptri.f
ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
ctrsyl.f ctrtrs.f ctzrzf.f cung2l.f cung2r.f
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f cunm22.f
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f
cgelqt.f cgelqt3.f cgemlqt.f
cgetsls.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
cgelq.f claswlq.f clamswlq.f cgemlq.f
ctplqt.f ctplqt2.f ctpmlqt.f
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f)
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
csysvxx.f csyrfsx.f cla_syrfsx_extended.f cla_syamv.f
cla_syrcond_c.f cla_syrcond_x.f cla_syrpvgrw.f
cposvxx.f cporfsx.f cla_porfsx_extended.f
cla_porcond_c.f cla_porcond_x.f cla_porpvgrw.f
cgbsvxx.f cgbrfsx.f cla_gbrfsx_extended.f cla_gbamv.f
cla_gbrcond_c.f cla_gbrcond_x.f cla_gbrpvgrw.f
chesvxx.f cherfsx.f cla_herfsx_extended.f cla_heamv.f
cla_hercond_c.f cla_hercond_x.f cla_herpvgrw.f
cla_lin_berr.f clarscl2.f clascl2.f cla_wwaddw.f)
set(DLASRC
dbdsvdx.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
dgehd2.f dgehrd.f dgelq2.f dgelqf.f
dgels.f dgelsd.f dgelss.f dgelsy.f dgeql2.f dgeqlf.f
dgeqp3.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f dgerq2.f dgerqf.f
dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f dgetc2.f
dgetrf2.f dgetri.f
dggbak.f dggbal.f
dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f
dggglm.f dgghrd.f dgghd3.f dgglse.f dggqrf.f
dggrqf.f dggsvd3.f dggsvp3.f dgtcon.f dgtrfs.f dgtsv.f
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
dlapll.f dlapmt.f
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
dlargv.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlasy2.f
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
dpbstf.f dpbsv.f dpbsvx.f
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
dposvx.f dpotrf2.f dpotri.f dpotrs.f dpstrf.f dpstf2.f
dppcon.f dppequ.f
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
dstevx.f dsycon.f dsyev.f dsyevd.f dsyevr.f
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
dsysv.f dsysvx.f
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytrs.f dsytrs2.f
dsytri2.f dsytri2x.f dsyswapr.f
dsyconv.f dsyconvf.f dsyconvf_rook.f
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
dsytri_rook.f dsycon_rook.f dsysv_rook.f
dsytf2_rk.f dsytrf_rk.f dsytrs_3.f
dsytri_3.f dsytri_3x.f dsycon_3.f dsysv_rk.f
dsysv_aa.f dsysv_aa_2stage.f dsytrf_aa.f dsytrf_aa_2stage.f dsytrs_aa.f dsytrs_aa_2stage.f
dtbcon.f
dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
dtptrs.f
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
dtrtrs.f dtzrzf.f dstemr.f
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f
dgelqt.f dgelqt3.f dgemlqt.f
dgetsls.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
dgelq.f dlaswlq.f dlamswlq.f dgemlq.f
dtplqt.f dtplqt2.f dtpmlqt.f
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f)
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
dla_syrfsx_extended.f dla_syamv.f dla_syrcond.f dla_syrpvgrw.f
dposvxx.f dporfsx.f dla_porfsx_extended.f dla_porcond.f
dla_porpvgrw.f dgbsvxx.f dgbrfsx.f dla_gbrfsx_extended.f
dla_gbamv.f dla_gbrcond.f dla_gbrpvgrw.f dla_lin_berr.f dlarscl2.f
dlascl2.f dla_wwaddw.f)
set(ZLASRC
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
zgehd2.f zgehrd.f zgelq2.f zgelqf.f
zgels.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvx.f
zgesvj.f zgejsv.f zgsvj0.f zgsvj1.f
zgetc2.f zgetrf2.f
zgetri.f
zggbak.f zggbal.f
zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f
zggglm.f zgghrd.f zgghd3.f zgglse.f zggqrf.f zggrqf.f
zggsvd3.f zggsvp3.f
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
zhetf2.f zhetrd.f
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
zhetrs.f zhetrs2.f
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f
zhetrs_rook.f zhecon_rook.f zhesv_rook.f
zhetf2_rk.f zhetrf_rk.f zhetri_3.f zhetri_3x.f
zhetrs_3.f zhecon_3.f zhesv_rk.f
zhesv_aa.f zhesv_aa_2stage.f zhetrf_aa.f zhetrf_aa_2stage.f zhetrs_aa.f zhetrs_aa_2stage.f
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
zhpsvx.f
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
zlaed0.f zlaed7.f zlaed8.f
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
zlahef.f zlahef_rook.f zlahef_rk.f zlahef_aa.f zlahqr.f
zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
zlangt.f zlanhb.f
zlanhe.f
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
zlarcm.f zlarf.f zlarfb.f
zlarfg.f zlarfgp.f zlarft.f
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
zlassq.f zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
zposv.f zposvx.f zpotrf2.f zpotri.f zpotrs.f zpstrf.f zpstf2.f
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
zrot.f zspcon.f zsprfs.f zspsv.f
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
zstegr.f zstein.f zsteqr.f zsycon.f
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f
zsytri2.f zsytri2x.f zsyswapr.f
zsytrs.f zsytrs2.f
zsyconv.f zsyconvf.f zsyconvf_rook.f
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f zsytrs_aa.f zsytrs_aa_2stage.f
zsytri_rook.f zsycon_rook.f zsysv_rook.f
zsytf2_rk.f zsytrf_rk.f zsytrf_aa.f zsytrf_aa_2stage.f zsytrs_3.f
zsytri_3.f zsytri_3x.f zsycon_3.f zsysv_rk.f zsysv_aa.f zsysv_aa_2stage.f
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
ztprfs.f ztptri.f
ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
ztrsyl.f ztrtrs.f ztzrzf.f zung2l.f
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f zunm22.f
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
zunmtr.f zupgtr.f
zupmtr.f izmax1.f dzsum1.f zstemr.f
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f
ztplqt.f ztplqt2.f ztpmlqt.f
zgelqt.f zgelqt3.f zgemlqt.f
zgetsls.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f)
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
zla_syrfsx_extended.f zla_syamv.f zla_syrcond_c.f zla_syrcond_x.f
zla_syrpvgrw.f zposvxx.f zporfsx.f zla_porfsx_extended.f
zla_porcond_c.f zla_porcond_x.f zla_porpvgrw.f zgbsvxx.f zgbrfsx.f
zla_gbrfsx_extended.f zla_gbamv.f zla_gbrcond_c.f zla_gbrcond_x.f
zla_gbrpvgrw.f zhesvxx.f zherfsx.f zla_herfsx_extended.f
zla_heamv.f zla_hercond_c.f zla_hercond_x.f zla_herpvgrw.f
zla_lin_berr.f zlarscl2.f zlascl2.f zla_wwaddw.f)
if(USE_XBLAS)
set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC})
endif()
list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f
DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f
DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f)
list(APPEND DLASRC DEPRECATED/dgegs.f DEPRECATED/dgegv.f
DEPRECATED/dgeqpf.f DEPRECATED/dgelsx.f DEPRECATED/dggsvd.f
DEPRECATED/dggsvp.f DEPRECATED/dlahrd.f DEPRECATED/dlatzm.f DEPRECATED/dtzrqf.f)
list(APPEND CLASRC DEPRECATED/cgegs.f DEPRECATED/cgegv.f
DEPRECATED/cgeqpf.f DEPRECATED/cgelsx.f DEPRECATED/cggsvd.f
DEPRECATED/cggsvp.f DEPRECATED/clahrd.f DEPRECATED/clatzm.f DEPRECATED/ctzrqf.f)
list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f
DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f
DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f)
message(STATUS "Building deprecated routines")
set(DSLASRC spotrs.f)
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
cgesvx.f cgetc2.f cgetri.f
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
cgghrd.f cgglse.f cggqrf.f cggrqf.f
DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
chetf2.f chetrd.f
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
chetrs.f chetrs2.f
chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
chgeqz.f chpcon.f chpev.f chpevd.f
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
chpsvx.f
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
claed0.f claed7.f claed8.f
claein.f claesy.f claev2.f clags2.f clagtm.f
clahef.f clahef_rook.f clahqr.f
DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
clanhb.f clanhe.f
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
clarf.f clarfb.f clarfg.f clarft.f clarfgp.f
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
cposv.f cposvx.f cpstrf.f cpstf2.f
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
crot.f cspcon.f csprfs.f cspsv.f
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
cstegr.f cstein.f csteqr.f
csycon.f
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
csyswapr.f csytrs.f csytrs2.f csyconv.f
csytf2_rook.f csytrf_rook.f csytrs_rook.f
csytri_rook.f csycon_rook.f csysv_rook.f
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
ctprfs.f ctptri.f
ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
cgelq.f cgelqt.f cgelqt3.f cgemlq.f cgemlqt.f cgemqr.f cgeqr.f cgetsls.f
clamswlq.f clamtsqr.f claswlq.f clatsqr.f ctplqt.f ctplqt2.f ctpmlqt.f
chesv_aa.f chetrf_aa.f chetrs_aa.f clahef_aa.f csytf2_rk.f clasyf_rk.f
csytrf_rk.f csytrs_3.f csycon_3.f csytri_3.f csytri_3x.f csysv_rk.f
chetf2_rk.f clahef_rk.f chetrf_rk.f chetrs_3.f checon_3.f chetri_3.f
chetri_3x.f chesv_rk.f chb2st_kernels.f chbev_2stage.f chbevd_2stage.f
chbevx_2stage.f cheev_2stage.f cheevd_2stage.f cheevr_2stage.f cheevx_2stage.f
chegv_2stage.f chetrd_2stage.f chetrd_hb2st.F chetrd_he2hb.f clarfy.f
)
set(ZCLASRC cpotrs.f)
set(SCATGEN slatm1.f slaran.f slarnd.f)
set(DLASRC
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f
dgetc2.f dgetri.f dgetrf2.f
dggbak.f dggbal.f dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f
dggglm.f dgghd3.f dgghrd.f dgglse.f dggqrf.f
dggrqf.f dggsvd3.f dggsvp3.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
dlapll.f dlapmt.f
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
dlargv.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
dpbstf.f dpbsv.f dpbsvx.f
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
dposvx.f dpotrf2.f dpotrs.f dpstrf.f dpstf2.f
dppcon.f dppequ.f
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
dstevx.f
dsycon.f dsyev.f dsyevd.f dsyevr.f
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
dsysv.f dsysvx.f
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
dsytri_rook.f dsycon_rook.f dsysv_rook.f
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
dtptrs.f
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f dgetsls.f
dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f dtplqt2.f dtpmlqt.f
dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f dsytf2_rk.f dlasyf_rk.f
dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f dsytri_3x.f dsysv_rk.f
dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f dsbevx_2stage.f
dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f dsyevx_2stage.f
dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F dsytrd_sy2sb.f dlarfy.f
)
set(SMATGEN slatms.f slatme.f slatmr.f slatmt.f
slagge.f slagsy.f slakf2.f slarge.f slaror.f slarot.f slatm2.f
slatm3.f slatm5.f slatm6.f slatm7.f slahilb.f)
set(ZLASRC
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgejsv.f zgelq2.f zgelqf.f
zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvj.f zgesvx.f zgetc2.f
zgetri.f zgetrf2.f
zggbak.f zggbal.f zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f zggglm.f
zgghd3.f zgghrd.f zgglse.f zggqrf.f zggrqf.f
DEPRECATED/zggsvd.f zggsvd3.f DEPRECATED/zggsvp.f zggsvp3.f
zgsvj0.f zgsvj1.f
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
zhetf2.f zhetrd.f
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
zhetrs.f zhetrs2.f
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
zhpsvx.f
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
zlaed0.f zlaed7.f zlaed8.f
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
zlahef.f zlahef_rook.f zlahqr.f
DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
zlangt.f zlanhb.f
zlanhe.f
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
zlarcm.f zlarf.f zlarfb.f
zlarfg.f zlarft.f zlarfgp.f
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
zlassq.f zlasyf.f zlasyf_rook.f zlasyf_aa.f
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
zposv.f zposvx.f zpotrf2.f zpotrs.f zpstrf.f zpstf2.f
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
zrot.f zspcon.f zsprfs.f zspsv.f
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
zstegr.f zstein.f zsteqr.f
zsycon.f zsysv_aa.f
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
zsyswapr.f zsytrs.f zsytrs_aa.f zsytrs2.f zsyconv.f
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
zsytri_rook.f zsycon_rook.f zsysv_rook.f
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
ztprfs.f ztptri.f
ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunm22.f zunml2.f
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
zunmtr.f zupgtr.f
zupmtr.f izmax1.f dzsum1.f zstemr.f
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
zgelq.f zgelqt.f zgelqt3.f zgemlq.f zgemlqt.f zgemqr.f zgeqr.f zgetsls.f
zlamswlq.f zlamtsqr.f zlaswlq.f zlatsqr.f ztplqt.f ztplqt2.f ztpmlqt.f
zhesv_aa.f zhetrf_aa.f zhetrs_aa.f zlahef_aa.f zsytf2_rk.f zlasyf_rk.f
zsytrf_aa.f zsytrf_rk.f zsytrs_3.f zsycon_3.f zsytri_3.f zsytri_3x.f zsysv_rk.f
zhetf2_rk.f zlahef_rk.f zhetrf_rk.f zhetrs_3.f zhecon_3.f zhetri_3.f
zhetri_3x.f zhesv_rk.f zhb2st_kernels.f zhbev_2stage.f zhbevd_2stage.f
zhbevx_2stage.f zheev_2stage.f zheevd_2stage.f zheevr_2stage.f
zheevx_2stage.f zhegv_2stage.f zhetrd_2stage.f zhetrd_hb2st.F zhetrd_he2hb.f
zlarfy.f
)
set(CMATGEN clatms.f clatme.f clatmr.f clatmt.f
clagge.f claghe.f clagsy.f clakf2.f clarge.f claror.f clarot.f
clatm1.f clarnd.f clatm2.f clatm3.f clatm5.f clatm6.f clahilb.f slatm7.f)
set(LA_REL_SRC ${ALLAUX})
if (BUILD_SINGLE)
list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
endif ()
set(DZATGEN dlatm1.f dlaran.f dlarnd.f)
if (BUILD_DOUBLE)
list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
endif ()
set(DMATGEN dlatms.f dlatme.f dlatmr.f dlatmt.f
dlagge.f dlagsy.f dlakf2.f dlarge.f dlaror.f dlarot.f dlatm2.f
dlatm3.f dlatm5.f dlatm6.f dlatm7.f dlahilb.f)
if (BUILD_COMPLEX)
list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
endif ()
set(ZMATGEN zlatms.f zlatme.f zlatmr.f zlatmt.f
zlagge.f zlaghe.f zlagsy.f zlakf2.f zlarge.f zlaror.f zlarot.f
zlatm1.f zlarnd.f zlatm2.f zlatm3.f zlatm5.f zlatm6.f zlahilb.f dlatm7.f)
if(BUILD_SINGLE)
set(LA_REL_SRC ${SLASRC} ${DSLASRC} ${ALLAUX} ${SCLAUX})
set(LA_GEN_SRC ${SMATGEN} ${SCATGEN})
message(STATUS "Building Single Precision")
endif()
if(BUILD_DOUBLE)
set(LA_REL_SRC ${LA_REL_SRC} ${DLASRC} ${DSLASRC} ${ALLAUX} ${DZLAUX})
set(LA_GEN_SRC ${LA_GEN_SRC} ${DMATGEN} ${DZATGEN})
message(STATUS "Building Double Precision")
endif()
if(BUILD_COMPLEX)
set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX})
SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN})
message(STATUS "Building Complex Precision")
endif()
if(BUILD_COMPLEX16)
set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX})
SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN})
message(STATUS "Building Double Complex Precision")
endif()
if (BUILD_COMPLEX16)
list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
endif ()
# add lapack-netlib folder to the sources
set(LA_SOURCES "")
foreach (LA_FILE ${LA_REL_SRC})
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
endforeach ()
foreach (LA_FILE ${LA_GEN_SRC})
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/TESTING/MATGEN/${LA_FILE}")
endforeach ()
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")

View File

@@ -1,5 +1,5 @@
set(CSRC
set(C_SRC
lapacke_cbbcsd.c
lapacke_cbbcsd_work.c
lapacke_cbdsqr.c
@@ -46,8 +46,6 @@ set(CSRC
lapacke_cgehrd_work.c
lapacke_cgejsv.c
lapacke_cgejsv_work.c
lapacke_cgelq.c
lapacke_cgelq_work.c
lapacke_cgelq2.c
lapacke_cgelq2_work.c
lapacke_cgelqf.c
@@ -62,18 +60,12 @@ set(CSRC
lapacke_cgelsy_work.c
lapacke_cgemqr.c
lapacke_cgemqr_work.c
lapacke_cgemlq.c
lapacke_cgemlq_work.c
lapacke_cgemqrt.c
lapacke_cgemqrt_work.c
lapacke_cgeqlf.c
lapacke_cgeqlf_work.c
lapacke_cgeqp3.c
lapacke_cgeqp3_work.c
lapacke_cgeqpf.c
lapacke_cgeqpf_work.c
lapacke_cgeqr.c
lapacke_cgeqr_work.c
lapacke_cgeqr2.c
lapacke_cgeqr2_work.c
lapacke_cgeqrf.c
@@ -142,12 +134,8 @@ set(CSRC
lapacke_cggqrf_work.c
lapacke_cggrqf.c
lapacke_cggrqf_work.c
lapacke_cggsvd.c
lapacke_cggsvd_work.c
lapacke_cggsvd3.c
lapacke_cggsvd3_work.c
lapacke_cggsvp.c
lapacke_cggsvp_work.c
lapacke_cggsvp3.c
lapacke_cggsvp3_work.c
lapacke_cgtcon.c
@@ -222,8 +210,6 @@ set(CSRC
lapacke_chesv_work.c
lapacke_chesv_aa.c
lapacke_chesv_aa_work.c
lapacke_chesv_aa_2stage.c
lapacke_chesv_aa_2stage_work.c
lapacke_chesv_rk.c
lapacke_chesv_rk_work.c
lapacke_chesvx.c
@@ -238,8 +224,6 @@ set(CSRC
lapacke_chetrf_rook_work.c
lapacke_chetrf_aa.c
lapacke_chetrf_aa_work.c
lapacke_chetrf_aa_2stage.c
lapacke_chetrf_aa_2stage_work.c
lapacke_chetrf_rk.c
lapacke_chetrf_rk_work.c
lapacke_chetri.c
@@ -258,9 +242,6 @@ set(CSRC
lapacke_chetrs_rook_work.c
lapacke_chetrs_aa.c
lapacke_chetrs_aa_work.c
lapacke_chetrs_aa_2stage.c
lapacke_chetrs_aa_2stage_work.c
lapacke_chetrf_rk.c
lapacke_chetrs_3.c
lapacke_chetrs_3_work.c
lapacke_chfrk.c
@@ -309,11 +290,6 @@ set(CSRC
lapacke_clacp2_work.c
lapacke_clacpy.c
lapacke_clacpy_work.c
lapacke_clacrm.c
lapacke_clacrm_work.c
lapacke_clarcm.c
lapacke_clarcm_work.c
lapacke_clacn2.c
lapacke_clag2z.c
lapacke_clag2z_work.c
lapacke_clange.c
@@ -342,8 +318,6 @@ set(CSRC
lapacke_clascl_work.c
lapacke_claset.c
lapacke_claset_work.c
lapacke_classq.c
lapacke_classq_work.c
lapacke_claswp.c
lapacke_claswp_work.c
lapacke_clauum.c
@@ -462,8 +436,6 @@ set(CSRC
lapacke_csysv_work.c
lapacke_csysv_aa.c
lapacke_csysv_aa_work.c
lapacke_csysv_aa_2stage.c
lapacke_csysv_aa_2stage_work.c
lapacke_csysv_rk.c
lapacke_csysv_rk_work.c
lapacke_csysvx.c
@@ -476,8 +448,6 @@ set(CSRC
lapacke_csytrf_rook_work.c
lapacke_csytrf_aa.c
lapacke_csytrf_aa_work.c
lapacke_csytrf_aa_2stage.c
lapacke_csytrf_aa_2stage_work.c
lapacke_csytrf_rk.c
lapacke_csytrf_rk_work.c
lapacke_csytri.c
@@ -496,8 +466,6 @@ set(CSRC
lapacke_csytrs_rook_work.c
lapacke_csytrs_aa.c
lapacke_csytrs_aa_work.c
lapacke_csytrs_aa_2stage.c
lapacke_csytrs_aa_2stage_work.c
lapacke_csytrs_3.c
lapacke_csytrs_3_work.c
lapacke_ctbcon.c
@@ -665,8 +633,6 @@ set(DSRC
lapacke_dgehrd_work.c
lapacke_dgejsv.c
lapacke_dgejsv_work.c
lapacke_dgelq.c
lapacke_dgelq_work.c
lapacke_dgelq2.c
lapacke_dgelq2_work.c
lapacke_dgelqf.c
@@ -679,8 +645,6 @@ set(DSRC
lapacke_dgelss_work.c
lapacke_dgelsy.c
lapacke_dgelsy_work.c
lapacke_dgemlq.c
lapacke_dgemlq_work.c
lapacke_dgemqr.c
lapacke_dgemqr_work.c
lapacke_dgemqrt.c
@@ -689,10 +653,6 @@ set(DSRC
lapacke_dgeqlf_work.c
lapacke_dgeqp3.c
lapacke_dgeqp3_work.c
lapacke_dgeqpf.c
lapacke_dgeqpf_work.c
lapacke_dgeqr.c
lapacke_dgeqr_work.c
lapacke_dgeqr2.c
lapacke_dgeqr2_work.c
lapacke_dgeqrf.c
@@ -761,12 +721,8 @@ set(DSRC
lapacke_dggqrf_work.c
lapacke_dggrqf.c
lapacke_dggrqf_work.c
lapacke_dggsvd.c
lapacke_dggsvd_work.c
lapacke_dggsvd3.c
lapacke_dggsvd3_work.c
lapacke_dggsvp.c
lapacke_dggsvp_work.c
lapacke_dggsvp3.c
lapacke_dggsvp3_work.c
lapacke_dgtcon.c
@@ -829,9 +785,6 @@ set(DSRC
lapacke_dlaset_work.c
lapacke_dlasrt.c
lapacke_dlasrt_work.c
lapacke_dlassq.c
lapacke_dlassq_work.c
lapacke_dlaswp.c
lapacke_dlaswp.c
lapacke_dlaswp_work.c
lapacke_dlauum.c
@@ -1072,8 +1025,6 @@ set(DSRC
lapacke_dsysv_work.c
lapacke_dsysv_aa.c
lapacke_dsysv_aa_work.c
lapacke_dsysv_aa_2stage.c
lapacke_dsysv_aa_2stage_work.c
lapacke_dsysv_rk.c
lapacke_dsysv_rk_work.c
lapacke_dsysvx.c
@@ -1088,8 +1039,6 @@ set(DSRC
lapacke_dsytrf_rook_work.c
lapacke_dsytrf_aa.c
lapacke_dsytrf_aa_work.c
lapacke_dsytrf_aa_2stage.c
lapacke_dsytrf_aa_2stage_work.c
lapacke_dsytrf_rk.c
lapacke_dsytrf_rk_work.c
lapacke_dsytri.c
@@ -1106,8 +1055,6 @@ set(DSRC
lapacke_dsytrs2_work.c
lapacke_dsytrs_aa.c
lapacke_dsytrs_aa_work.c
lapacke_dsytrs_aa_2stage.c
lapacke_dsytrs_aa_2stage_work.c
lapacke_dsytrs_3.c
lapacke_dsytrs_3_work.c
lapacke_dsytrs_work.c
@@ -1237,8 +1184,6 @@ set(SSRC
lapacke_sgehrd_work.c
lapacke_sgejsv.c
lapacke_sgejsv_work.c
lapacke_sgelq.c
lapacke_sgelq_work.c
lapacke_sgelq2.c
lapacke_sgelq2_work.c
lapacke_sgelqf.c
@@ -1251,8 +1196,6 @@ set(SSRC
lapacke_sgelss_work.c
lapacke_sgelsy.c
lapacke_sgelsy_work.c
lapacke_sgemlq.c
lapacke_sgemlq_work.c
lapacke_sgemqr.c
lapacke_sgemqr_work.c
lapacke_sgemqrt.c
@@ -1261,10 +1204,6 @@ set(SSRC
lapacke_sgeqlf_work.c
lapacke_sgeqp3.c
lapacke_sgeqp3_work.c
lapacke_sgeqpf.c
lapacke_sgeqpf_work.c
lapacke_sgeqr.c
lapacke_sgeqr_work.c
lapacke_sgeqr2.c
lapacke_sgeqr2_work.c
lapacke_sgeqrf.c
@@ -1333,12 +1272,8 @@ set(SSRC
lapacke_sggqrf_work.c
lapacke_sggrqf.c
lapacke_sggrqf_work.c
lapacke_sggsvd.c
lapacke_sggsvd_work.c
lapacke_sggsvd3.c
lapacke_sggsvd3_work.c
lapacke_sggsvp.c
lapacke_sggsvp_work.c
lapacke_sggsvp3.c
lapacke_sggsvp3_work.c
lapacke_sgtcon.c
@@ -1401,8 +1336,6 @@ set(SSRC
lapacke_slaset_work.c
lapacke_slasrt.c
lapacke_slasrt_work.c
lapacke_slassq.c
lapacke_slassq_work.c
lapacke_slaswp.c
lapacke_slaswp_work.c
lapacke_slauum.c
@@ -1639,8 +1572,6 @@ set(SSRC
lapacke_ssysv_work.c
lapacke_ssysv_aa.c
lapacke_ssysv_aa_work.c
lapacke_ssysv_aa_2stage.c
lapacke_ssysv_aa_2stage_work.c
lapacke_ssysv_rk.c
lapacke_ssysv_rk_work.c
lapacke_ssysvx.c
@@ -1655,9 +1586,6 @@ set(SSRC
lapacke_ssytrf_rook_work.c
lapacke_ssytrf_aa.c
lapacke_ssytrf_aa_work.c
lapacke_ssytrf_aa_2stage.c
lapacke_ssytrf_aa_2stage_work.c
lapacke_ssytrf_rook.c
lapacke_ssytrf_rk.c
lapacke_ssytrf_rk_work.c
lapacke_ssytri.c
@@ -1674,8 +1602,6 @@ set(SSRC
lapacke_ssytrs2_work.c
lapacke_ssytrs_aa.c
lapacke_ssytrs_aa_work.c
lapacke_ssytrs_aa_2stage.c
lapacke_ssytrs_aa_2stage_work.c
lapacke_ssytrs_3.c
lapacke_ssytrs_3_work.c
lapacke_ssytrs_work.c
@@ -1803,8 +1729,6 @@ set(ZSRC
lapacke_zgehrd_work.c
lapacke_zgejsv.c
lapacke_zgejsv_work.c
lapacke_zgelq.c
lapacke_zgelq_work.c
lapacke_zgelq2.c
lapacke_zgelq2_work.c
lapacke_zgelqf.c
@@ -1817,8 +1741,6 @@ set(ZSRC
lapacke_zgelss_work.c
lapacke_zgelsy.c
lapacke_zgelsy_work.c
lapacke_zgemlq.c
lapacke_zgemlq_work.c
lapacke_zgemqr.c
lapacke_zgemqr_work.c
lapacke_zgemqrt.c
@@ -1827,10 +1749,6 @@ set(ZSRC
lapacke_zgeqlf_work.c
lapacke_zgeqp3.c
lapacke_zgeqp3_work.c
lapacke_zgeqpf.c
lapacke_zgeqpf_work.c
lapacke_zgeqr.c
lapacke_zgeqr_work.c
lapacke_zgeqr2.c
lapacke_zgeqr2_work.c
lapacke_zgeqrf.c
@@ -1899,12 +1817,8 @@ set(ZSRC
lapacke_zggqrf_work.c
lapacke_zggrqf.c
lapacke_zggrqf_work.c
lapacke_zggsvd.c
lapacke_zggsvd_work.c
lapacke_zggsvd3.c
lapacke_zggsvd3_work.c
lapacke_zggsvp.c
lapacke_zggsvp_work.c
lapacke_zggsvp3.c
lapacke_zggsvp3_work.c
lapacke_zgtcon.c
@@ -1925,12 +1839,6 @@ set(ZSRC
lapacke_zhbevd_work.c
lapacke_zhbevx.c
lapacke_zhbevx_work.c
lapacke_zhbev_2stage.c
lapacke_zhbev_2stage_work.c
lapacke_zhbevd_2stage.c
lapacke_zhbevd_2stage_work.c
lapacke_zhbevx_2stage.c
lapacke_zhbevx_2stage_work.c
lapacke_zhbgst.c
lapacke_zhbgst_work.c
lapacke_zhbgv.c
@@ -1979,8 +1887,6 @@ set(ZSRC
lapacke_zhesv_work.c
lapacke_zhesv_aa.c
lapacke_zhesv_aa_work.c
lapacke_zhesv_aa_2stage.c
lapacke_zhesv_aa_2stage_work.c
lapacke_zhesv_rk.c
lapacke_zhesv_rk_work.c
lapacke_zhesvx.c
@@ -1995,8 +1901,6 @@ set(ZSRC
lapacke_zhetrf_rook_work.c
lapacke_zhetrf_aa.c
lapacke_zhetrf_aa_work.c
lapacke_zhetrf_aa_2stage.c
lapacke_zhetrf_aa_2stage_work.c
lapacke_zhetrf_rk.c
lapacke_zhetrf_rk_work.c
lapacke_zhetri.c
@@ -2014,8 +1918,6 @@ set(ZSRC
lapacke_zhetrs_work.c
lapacke_zhetrs_aa.c
lapacke_zhetrs_aa_work.c
lapacke_zhetrs_aa_2stage.c
lapacke_zhetrs_aa_2stage_work.c
lapacke_zhetrs_3.c
lapacke_zhetrs_3_work.c
lapacke_zhetrs_rook_work.c
@@ -2065,8 +1967,6 @@ set(ZSRC
lapacke_zlacp2_work.c
lapacke_zlacpy.c
lapacke_zlacpy_work.c
lapacke_zlacrm.c
lapacke_zlacrm_work.c
lapacke_zlag2c.c
lapacke_zlag2c_work.c
lapacke_zlange.c
@@ -2081,8 +1981,6 @@ set(ZSRC
lapacke_zlapmr_work.c
lapacke_zlapmt.c
lapacke_zlapmt_work.c
lapacke_zlarcm.c
lapacke_zlarcm_work.c
lapacke_zlarfb.c
lapacke_zlarfb_work.c
lapacke_zlarfg.c
@@ -2097,8 +1995,6 @@ set(ZSRC
lapacke_zlascl_work.c
lapacke_zlaset.c
lapacke_zlaset_work.c
lapacke_zlassq.c
lapacke_zlassq_work.c
lapacke_zlaswp.c
lapacke_zlaswp_work.c
lapacke_zlauum.c
@@ -2217,8 +2113,6 @@ set(ZSRC
lapacke_zsysv_work.c
lapacke_zsysv_aa.c
lapacke_zsysv_aa_work.c
lapacke_zsysv_aa_2stage.c
lapacke_zsysv_aa_2stage_work.c
lapacke_zsysv_rk.c
lapacke_zsysv_rk_work.c
lapacke_zsysvx.c
@@ -2231,8 +2125,6 @@ set(ZSRC
lapacke_zsytrf_rook_work.c
lapacke_zsytrf_aa.c
lapacke_zsytrf_aa_work.c
lapacke_zsytrf_aa_2stage.c
lapacke_zsytrf_aa_2stage_work.c
lapacke_zsytrf_rk.c
lapacke_zsytrf_rk_work.c
lapacke_zsytri.c
@@ -2251,8 +2143,6 @@ set(ZSRC
lapacke_zsytrs_rook_work.c
lapacke_zsytrs_aa.c
lapacke_zsytrs_aa_work.c
lapacke_zsytrs_aa_2stage.c
lapacke_zsytrs_aa_2stage_work.c
lapacke_zsytrs_3.c
lapacke_zsytrs_3_work.c
lapacke_ztbcon.c
@@ -2373,92 +2263,104 @@ set(ZSRC
)
set(SRCX
lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c
lapacke_cgbrfsx_work.c lapacke_cporfsx_work.c lapacke_dgerfsx_work.c lapacke_sgbrfsx_work.c lapacke_ssyrfsx_work.c lapacke_zherfsx_work.c
lapacke_cgerfsx.c lapacke_csyrfsx.c lapacke_dporfsx.c lapacke_sgerfsx.c lapacke_zgbrfsx.c lapacke_zporfsx.c
lapacke_cgerfsx_work.c lapacke_csyrfsx_work.c lapacke_dporfsx_work.c lapacke_sgerfsx_work.c lapacke_zgbrfsx_work.c lapacke_zporfsx_work.c
lapacke_cherfsx.c lapacke_dgbrfsx.c lapacke_dsyrfsx.c lapacke_sporfsx.c lapacke_zgerfsx.c lapacke_zsyrfsx.c
lapacke_cherfsx_work.c lapacke_dgbrfsx_work.c lapacke_dsyrfsx_work.c lapacke_sporfsx_work.c lapacke_zgerfsx_work.c lapacke_zsyrfsx_work.c
lapacke_cgbsvxx.c lapacke_cposvxx.c lapacke_dgesvxx.c lapacke_sgbsvxx.c lapacke_ssysvxx.c lapacke_zhesvxx.c
lapacke_cgbsvxx_work.c lapacke_cposvxx_work.c lapacke_dgesvxx_work.c lapacke_sgbsvxx_work.c lapacke_ssysvxx_work.c lapacke_zhesvxx_work.c
lapacke_cgesvxx.c lapacke_csysvxx.c lapacke_dposvxx.c lapacke_sgesvxx.c lapacke_zgbsvxx.c lapacke_zposvxx.c
lapacke_cgesvxx_work.c lapacke_csysvxx_work.c lapacke_dposvxx_work.c lapacke_sgesvxx_work.c lapacke_zgbsvxx_work.c lapacke_zposvxx_work.c
lapacke_chesvxx.c lapacke_dgbsvxx.c lapacke_dsysvxx.c lapacke_sposvxx.c lapacke_zgesvxx.c lapacke_zsysvxx.c
lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c
lapacke_cgbrfsx_work.c lapacke_cporfsx_work.c lapacke_dgerfsx_work.c lapacke_sgbrfsx_work.c lapacke_ssyrfsx_work.c lapacke_zherfsx_work.c
lapacke_cgerfsx.c lapacke_csyrfsx.c lapacke_dporfsx.c lapacke_sgerfsx.c lapacke_zgbrfsx.c lapacke_zporfsx.c
lapacke_cgerfsx_work.c lapacke_csyrfsx_work.c lapacke_dporfsx_work.c lapacke_sgerfsx_work.c lapacke_zgbrfsx_work.c lapacke_zporfsx_work.c
lapacke_cherfsx.c lapacke_dgbrfsx.c lapacke_dsyrfsx.c lapacke_sporfsx.c lapacke_zgerfsx.c lapacke_zsyrfsx.c
lapacke_cherfsx_work.c lapacke_dgbrfsx_work.c lapacke_dsyrfsx_work.c lapacke_sporfsx_work.c lapacke_zgerfsx_work.c lapacke_zsyrfsx_work.c
lapacke_cgbsvxx.c lapacke_cposvxx.c lapacke_dgesvxx.c lapacke_sgbsvxx.c lapacke_ssysvxx.c lapacke_zhesvxx.c
lapacke_cgbsvxx_work.c lapacke_cposvxx_work.c lapacke_dgesvxx_work.c lapacke_sgbsvxx_work.c lapacke_ssysvxx_work.c lapacke_zhesvxx_work.c
lapacke_cgesvxx.c lapacke_csysvxx.c lapacke_dposvxx.c lapacke_sgesvxx.c lapacke_zgbsvxx.c lapacke_zposvxx.c
lapacke_cgesvxx_work.c lapacke_csysvxx_work.c lapacke_dposvxx_work.c lapacke_sgesvxx_work.c lapacke_zgbsvxx_work.c lapacke_zposvxx_work.c
lapacke_chesvxx.c lapacke_dgbsvxx.c lapacke_dsysvxx.c lapacke_sposvxx.c lapacke_zgesvxx.c lapacke_zsysvxx.c
lapacke_chesvxx_work.c lapacke_dgbsvxx_work.c lapacke_dsysvxx_work.c lapacke_sposvxx_work.c lapacke_zgesvxx_work.c lapacke_zsysvxx_work.c
)
# FILE PARTS OF TMGLIB
# FILE PARTS OF TMGLIB
set(MATGEN
lapacke_clatms.c
lapacke_clatms_work.c
lapacke_dlatms.c
lapacke_dlatms_work.c
lapacke_slatms.c
lapacke_slatms_work.c
lapacke_zlatms.c
lapacke_zlatms_work.c
lapacke_clagge.c
lapacke_clagge_work.c
lapacke_dlagge.c
lapacke_dlagge_work.c
lapacke_slagge.c
lapacke_slagge_work.c
lapacke_zlagge.c
lapacke_zlagge_work.c
lapacke_claghe.c
lapacke_claghe_work.c
lapacke_zlaghe.c
lapacke_zlaghe_work.c
lapacke_clagsy.c
lapacke_clagsy_work.c
lapacke_dlagsy.c
lapacke_dlagsy_work.c
lapacke_slagsy.c
lapacke_slagsy_work.c
lapacke_zlagsy.c
lapacke_clatms.c
lapacke_clatms_work.c
lapacke_dlatms.c
lapacke_dlatms_work.c
lapacke_slatms.c
lapacke_slatms_work.c
lapacke_zlatms.c
lapacke_zlatms_work.c
lapacke_clagge.c
lapacke_clagge_work.c
lapacke_dlagge.c
lapacke_dlagge_work.c
lapacke_slagge.c
lapacke_slagge_work.c
lapacke_zlagge.c
lapacke_zlagge_work.c
lapacke_claghe.c
lapacke_claghe_work.c
lapacke_zlaghe.c
lapacke_zlaghe_work.c
lapacke_clagsy.c
lapacke_clagsy_work.c
lapacke_dlagsy.c
lapacke_dlagsy_work.c
lapacke_slagsy.c
lapacke_slagsy_work.c
lapacke_zlagsy.c
lapacke_zlagsy_work.c
lapacke_nancheck.c
)
set(Utils_SRC
lapacke_c_nancheck.c lapacke_ctr_trans.c lapacke_make_complex_float.c lapacke_zgb_nancheck.c
lapacke_cgb_nancheck.c lapacke_d_nancheck.c lapacke_s_nancheck.c lapacke_zgb_trans.c
lapacke_cgb_trans.c lapacke_dgb_nancheck.c lapacke_sgb_nancheck.c lapacke_zge_nancheck.c
lapacke_cge_nancheck.c lapacke_dgb_trans.c lapacke_sgb_trans.c lapacke_zge_trans.c
lapacke_cge_trans.c lapacke_dge_nancheck.c lapacke_sge_nancheck.c lapacke_zgg_nancheck.c
lapacke_cgg_nancheck.c lapacke_dge_trans.c lapacke_sge_trans.c lapacke_zgg_trans.c
lapacke_cgg_trans.c lapacke_dgg_nancheck.c lapacke_sgg_nancheck.c lapacke_zgt_nancheck.c
lapacke_cgt_nancheck.c lapacke_dgg_trans.c lapacke_sgg_trans.c lapacke_zhb_nancheck.c
lapacke_chb_nancheck.c lapacke_dgt_nancheck.c lapacke_sgt_nancheck.c lapacke_zhb_trans.c
lapacke_chb_trans.c lapacke_dhs_nancheck.c lapacke_shs_nancheck.c lapacke_zhe_nancheck.c
lapacke_che_nancheck.c lapacke_dhs_trans.c lapacke_shs_trans.c lapacke_zhe_trans.c
lapacke_che_trans.c lapacke_dpb_nancheck.c lapacke_spb_nancheck.c lapacke_zhp_nancheck.c
lapacke_chp_nancheck.c lapacke_dpb_trans.c lapacke_spb_trans.c lapacke_zhp_trans.c
lapacke_chp_trans.c lapacke_dpf_nancheck.c lapacke_spf_nancheck.c lapacke_zhs_nancheck.c
lapacke_chs_nancheck.c lapacke_dpf_trans.c lapacke_spf_trans.c lapacke_zhs_trans.c
lapacke_chs_trans.c lapacke_dpo_nancheck.c lapacke_spo_nancheck.c lapacke_zpb_nancheck.c
lapacke_cpb_nancheck.c lapacke_dpo_trans.c lapacke_spo_trans.c lapacke_zpb_trans.c
lapacke_cpb_trans.c lapacke_dpp_nancheck.c lapacke_spp_nancheck.c lapacke_zpf_nancheck.c
lapacke_cpf_nancheck.c lapacke_dpp_trans.c lapacke_spp_trans.c lapacke_zpf_trans.c
lapacke_cpf_trans.c lapacke_dpt_nancheck.c lapacke_spt_nancheck.c lapacke_zpo_nancheck.c
lapacke_cpo_nancheck.c lapacke_dsb_nancheck.c lapacke_ssb_nancheck.c lapacke_zpo_trans.c
lapacke_cpo_trans.c lapacke_dsb_trans.c lapacke_ssb_trans.c lapacke_zpp_nancheck.c
lapacke_cpp_nancheck.c lapacke_dsp_nancheck.c lapacke_ssp_nancheck.c lapacke_zpp_trans.c
lapacke_cpp_trans.c lapacke_dsp_trans.c lapacke_ssp_trans.c lapacke_zpt_nancheck.c
lapacke_cpt_nancheck.c lapacke_dst_nancheck.c lapacke_sst_nancheck.c lapacke_zsp_nancheck.c
lapacke_csp_nancheck.c lapacke_dsy_nancheck.c lapacke_ssy_nancheck.c lapacke_zsp_trans.c
lapacke_csp_trans.c lapacke_dsy_trans.c lapacke_ssy_trans.c lapacke_zst_nancheck.c
lapacke_cst_nancheck.c lapacke_dtb_nancheck.c lapacke_stb_nancheck.c lapacke_zsy_nancheck.c
lapacke_csy_nancheck.c lapacke_dtb_trans.c lapacke_stb_trans.c lapacke_zsy_trans.c
lapacke_csy_trans.c lapacke_dtf_nancheck.c lapacke_stf_nancheck.c lapacke_ztb_nancheck.c
lapacke_ctb_nancheck.c lapacke_dtf_trans.c lapacke_stf_trans.c lapacke_ztb_trans.c
lapacke_ctb_trans.c lapacke_dtp_nancheck.c lapacke_stp_nancheck.c lapacke_ztf_nancheck.c
lapacke_ctf_nancheck.c lapacke_dtp_trans.c lapacke_stp_trans.c lapacke_ztf_trans.c
lapacke_ctf_trans.c lapacke_dtr_nancheck.c lapacke_str_nancheck.c lapacke_ztp_nancheck.c
lapacke_ctp_nancheck.c lapacke_dtr_trans.c lapacke_str_trans.c lapacke_ztp_trans.c
lapacke_ctp_trans.c lapacke_lsame.c lapacke_xerbla.c lapacke_ztr_nancheck.c
lapacke_ctr_nancheck.c lapacke_make_complex_double.c lapacke_z_nancheck.c lapacke_ztr_trans.c
lapacke_cgb_nancheck.c lapacke_dpf_nancheck.c lapacke_ssy_trans.c
lapacke_cgb_trans.c lapacke_dpf_trans.c lapacke_stb_nancheck.c
lapacke_cge_nancheck.c lapacke_dpo_nancheck.c lapacke_stb_trans.c
lapacke_cge_trans.c lapacke_dpo_trans.c lapacke_stf_nancheck.c
lapacke_cgg_nancheck.c lapacke_dpp_nancheck.c lapacke_stf_trans.c
lapacke_cgg_trans.c lapacke_dpp_trans.c lapacke_stp_nancheck.c
lapacke_cgt_nancheck.c lapacke_dpt_nancheck.c lapacke_stp_trans.c
lapacke_chb_nancheck.c lapacke_dsb_nancheck.c lapacke_str_nancheck.c
lapacke_chb_trans.c lapacke_dsb_trans.c lapacke_str_trans.c
lapacke_che_nancheck.c lapacke_dsp_nancheck.c lapacke_xerbla.c
lapacke_che_trans.c lapacke_dsp_trans.c lapacke_zgb_nancheck.c
lapacke_chp_nancheck.c lapacke_dst_nancheck.c lapacke_zgb_trans.c
lapacke_chp_trans.c lapacke_dsy_nancheck.c lapacke_zge_nancheck.c
lapacke_chs_nancheck.c lapacke_dsy_trans.c lapacke_zge_trans.c
lapacke_chs_trans.c lapacke_dtb_nancheck.c lapacke_zgg_nancheck.c
lapacke_c_nancheck.c lapacke_dtb_trans.c lapacke_zgg_trans.c
lapacke_cpb_nancheck.c lapacke_dtf_nancheck.c lapacke_zgt_nancheck.c
lapacke_cpb_trans.c lapacke_dtf_trans.c lapacke_zhb_nancheck.c
lapacke_cpf_nancheck.c lapacke_dtp_nancheck.c lapacke_zhb_trans.c
lapacke_cpf_trans.c lapacke_dtp_trans.c lapacke_zhe_nancheck.c
lapacke_cpo_nancheck.c lapacke_dtr_nancheck.c lapacke_zhe_trans.c
lapacke_cpo_trans.c lapacke_dtr_trans.c lapacke_zhp_nancheck.c
lapacke_cpp_nancheck.c lapacke_lsame.c lapacke_zhp_trans.c
lapacke_cpp_trans.c lapacke_make_complex_double.c lapacke_zhs_nancheck.c
lapacke_cpt_nancheck.c lapacke_make_complex_float.c lapacke_zhs_trans.c
lapacke_csp_nancheck.c lapacke_sgb_nancheck.c lapacke_z_nancheck.c
lapacke_csp_trans.c lapacke_sgb_trans.c lapacke_zpb_nancheck.c
lapacke_cst_nancheck.c lapacke_sge_nancheck.c lapacke_zpb_trans.c
lapacke_csy_nancheck.c lapacke_sge_trans.c lapacke_zpf_nancheck.c
lapacke_csy_trans.c lapacke_sgg_nancheck.c lapacke_zpf_trans.c
lapacke_ctb_nancheck.c lapacke_sgg_trans.c lapacke_zpo_nancheck.c
lapacke_ctb_trans.c lapacke_sgt_nancheck.c lapacke_zpo_trans.c
lapacke_ctf_nancheck.c lapacke_shs_nancheck.c lapacke_zpp_nancheck.c
lapacke_ctf_trans.c lapacke_shs_trans.c lapacke_zpp_trans.c
lapacke_ctp_nancheck.c lapacke_s_nancheck.c lapacke_zpt_nancheck.c
lapacke_ctp_trans.c lapacke_spb_nancheck.c lapacke_zsp_nancheck.c
lapacke_ctr_nancheck.c lapacke_spb_trans.c lapacke_zsp_trans.c
lapacke_ctr_trans.c lapacke_spf_nancheck.c lapacke_zst_nancheck.c
lapacke_dgb_nancheck.c lapacke_spf_trans.c lapacke_zsy_nancheck.c
lapacke_dgb_trans.c lapacke_spo_nancheck.c lapacke_zsy_trans.c
lapacke_dge_nancheck.c lapacke_spo_trans.c lapacke_ztb_nancheck.c
lapacke_dge_trans.c lapacke_spp_nancheck.c lapacke_ztb_trans.c
lapacke_dgg_nancheck.c lapacke_spp_trans.c lapacke_ztf_nancheck.c
lapacke_dgg_trans.c lapacke_spt_nancheck.c lapacke_ztf_trans.c
lapacke_dgt_nancheck.c lapacke_ssb_nancheck.c lapacke_ztp_nancheck.c
lapacke_dhs_nancheck.c lapacke_ssb_trans.c lapacke_ztp_trans.c
lapacke_dhs_trans.c lapacke_ssp_nancheck.c lapacke_ztr_nancheck.c
lapacke_d_nancheck.c lapacke_ssp_trans.c lapacke_ztr_trans.c
lapacke_dpb_nancheck.c lapacke_sst_nancheck.c
lapacke_dpb_trans.c lapacke_ssy_nancheck.c
)
set(LAPACKE_REL_SRC "")
@@ -2478,10 +2380,6 @@ if (BUILD_COMPLEX16)
list(APPEND LAPACKE_REL_SRC ${ZSRC})
endif ()
if (BUILD_MATGEN)
list(APPEND LAPACKE_REL_SRC ${MATGEN})
endif ()
# add lapack-netlib folder to the sources
set(LAPACKE_SOURCES "")
foreach (LAE_FILE ${LAPACKE_REL_SRC})
@@ -2493,6 +2391,6 @@ foreach (Utils_FILE ${Utils_SRC})
endforeach ()
set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include")
configure_file("${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h" COPYONLY)
execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h")
include_directories(${lapacke_include_dir})
set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}")

View File

@@ -1,11 +1,9 @@
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
libsuffix=@SUFFIX64_UNDERSCORE@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@
Name: OpenBLAS
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: @OPENBLAS_VERSION@
URL: https://github.com/xianyi/OpenBLAS
Libs: -L${libdir} -lopenblas${libsuffix}
Libs: -L${libdir} -lopenblas
Cflags: -I${includedir}

View File

@@ -3,6 +3,19 @@
## Description: Ported from portion of OpenBLAS/Makefile.system
## Detects the OS and sets appropriate variables.
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
set(MD5SUM "md5 -r")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
set(MD5SUM "md5 -r")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
set(MD5SUM "md5 -n")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
set(EXTRALIB "${EXTRALIB} -lm")
set(NO_EXPRECISION 1)
@@ -43,7 +56,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
# Ensure the correct stack alignment on Win32
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
if (X86)
if (${ARCH} STREQUAL "x86")
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
endif ()
@@ -65,7 +78,7 @@ if (CYGWIN)
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Android")
if (USE_THREAD)
if (SMP)
set(EXTRALIB "${EXTRALIB} -lpthread")
endif ()
endif ()
@@ -75,7 +88,7 @@ if (QUAD_PRECISION)
set(NO_EXPRECISION 1)
endif ()
if (X86)
if (${ARCH} STREQUAL "x86")
set(NO_EXPRECISION 1)
endif ()

View File

@@ -37,314 +37,112 @@
# CPUIDEMU = ../../cpuid/table.o
if (DEFINED CPUIDEMU)
set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
endif ()
if (BUILD_KERNEL)
if (DEFINED TARGET_CORE)
# set the C flags for just this file
set(GETARCH2_FLAGS "-DBUILD_KERNEL")
set(TARGET_MAKE "Makefile_kernel.conf")
set(TARGET_CONF "config_kernel.h")
set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR}/kernel_config/${TARGET_CORE})
else()
set(TARGET_MAKE "Makefile.conf")
set(TARGET_CONF "config.h")
set(TARGET_CONF_DIR ${PROJECT_BINARY_DIR})
endif ()
set(TARGET_CONF_TEMP "${PROJECT_BINARY_DIR}/${TARGET_CONF}.tmp")
include("${PROJECT_SOURCE_DIR}/cmake/c_check.cmake")
# c_check
set(FU "")
if (APPLE OR (MSVC AND NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang"))
set(FU "_")
endif()
set(COMPILER_ID ${CMAKE_C_COMPILER_ID})
if (${COMPILER_ID} STREQUAL "GNU")
set(COMPILER_ID "GCC")
endif ()
string(TOUPPER ${ARCH} UC_ARCH)
file(WRITE ${TARGET_CONF_TEMP}
"#define OS_${HOST_OS}\t1\n"
"#define ARCH_${UC_ARCH}\t1\n"
"#define C_${COMPILER_ID}\t1\n"
"#define __${BINARY}BIT__\t1\n"
"#define FUNDERSCORE\t${FU}\n")
if (${HOST_OS} STREQUAL "WINDOWSSTORE")
file(APPEND ${TARGET_CONF_TEMP}
"#define OS_WINNT\t1\n")
endif ()
# f_check
if (NOT NOFORTRAN)
include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake")
endif ()
# Cannot run getarch on target if we are cross-compiling
if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSSTORE"))
# Write to config as getarch would
# compile getarch
set(GETARCH_SRC
${PROJECT_SOURCE_DIR}/getarch.c
${CPUIDEMO}
)
# TODO: Set up defines that getarch sets up based on every other target
# Perhaps this should be inside a different file as it grows larger
file(APPEND ${TARGET_CONF_TEMP}
"#define ${CORE}\n"
"#define CHAR_CORENAME \"${CORE}\"\n")
if ("${CORE}" STREQUAL "ARMV7")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE\t65536\n"
"#define L1_DATA_LINESIZE\t32\n"
"#define L2_SIZE\t512488\n"
"#define L2_LINESIZE\t32\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define L2_ASSOCIATIVE\t4\n"
"#define HAVE_VFPV3\n"
"#define HAVE_VFP\n")
set(SGEMM_UNROLL_M 4)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 4)
set(DGEMM_UNROLL_N 4)
elseif ("${CORE}" STREQUAL "ARMV8")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L2_SIZE\t262144\n"
"#define L2_LINESIZE\t64\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define L2_ASSOCIATIVE\t32\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 4)
set(SGEMM_UNROLL_N 4)
elseif ("${CORE}" STREQUAL "CORTEXA57" OR "${CORE}" STREQUAL "CORTEXA53")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t32768\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t3\n"
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t2\n"
"#define L2_SIZE\t262144\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t16\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define HAVE_VFPV4\n"
"#define HAVE_VFPV3\n"
"#define HAVE_VFP\n"
"#define HAVE_NEON\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 8)
set(ZGEMM_UNROLL_N 4)
elseif ("${CORE}" STREQUAL "CORTEXA72" OR "${CORE}" STREQUAL "CORTEXA73")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t49152\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t3\n"
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t2\n"
"#define L2_SIZE\t524288\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t16\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define HAVE_VFPV4\n"
"#define HAVE_VFPV3\n"
"#define HAVE_VFP\n"
"#define HAVE_NEON\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 8)
set(ZGEMM_UNROLL_N 4)
elseif ("${CORE}" STREQUAL "FALKOR")
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t65536\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t3\n"
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t128\n"
"#define L1_DATA_ASSOCIATIVE\t2\n"
"#define L2_SIZE\t524288\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t16\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define HAVE_VFPV4\n"
"#define HAVE_VFPV3\n"
"#define HAVE_VFP\n"
"#define HAVE_NEON\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 8)
set(ZGEMM_UNROLL_N 4)
elseif ("${CORE}" STREQUAL "THUNDERX)
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t32768\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t3\n"
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t128\n"
"#define L1_DATA_ASSOCIATIVE\t2\n"
"#define L2_SIZE\t167772164\n"
"#define L2_LINESIZE\t128\n"
"#define L2_ASSOCIATIVE\t16\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define HAVE_VFPV4\n"
"#define HAVE_VFPV3\n"
"#define HAVE_VFP\n"
"#define HAVE_NEON\n"
"#define ARMV8\n")
set(SGEMM_UNROLL_M 4)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 2)
set(DGEMM_UNROLL_N 2)
set(CGEMM_UNROLL_M 2)
set(CGEMM_UNROLL_N 2)
set(ZGEMM_UNROLL_M 2)
set(ZGEMM_UNROLL_N 2)
elseif ("${CORE}" STREQUAL "THUNDERX2T99)
file(APPEND ${TARGET_CONF_TEMP}
"#define L1_CODE_SIZE\t32768\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t8\n"
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t8\n"
"#define L2_SIZE\t262144\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t8\n"
"#define L3_SIZE\t33554432\n"
"#define L3_LINESIZE\t64\n"
"#define L3_ASSOCIATIVE\t32\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n"
"#define VULCAN\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
endif()
if (NOT MSVC)
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
endif ()
# Or should this actually be NUM_CORES?
if (${NUM_THREADS} GREATER 0)
file(APPEND ${TARGET_CONF_TEMP} "#define NUM_CORES\t${NUM_THREADS}\n")
endif()
if (MSVC)
#Use generic for MSVC now
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
endif()
# GetArch_2nd
foreach(float_char S;D;Q;C;Z;X)
if (NOT DEFINED ${float_char}GEMM_UNROLL_M)
set(${float_char}GEMM_UNROLL_M 2)
endif()
if (NOT DEFINED ${float_char}GEMM_UNROLL_N)
set(${float_char}GEMM_UNROLL_N 2)
endif()
endforeach()
file(APPEND ${TARGET_CONF_TEMP}
"#define GEMM_MULTITHREAD_THRESHOLD\t${GEMM_MULTITHREAD_THRESHOLD}\n")
# Move to where gen_config_h would place it
file(MAKE_DIRECTORY ${TARGET_CONF_DIR})
file(RENAME ${TARGET_CONF_TEMP} "${TARGET_CONF_DIR}/${TARGET_CONF}")
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
# disable WindowsStore strict CRT checks
set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
endif ()
else(NOT CMAKE_CROSSCOMPILING)
# compile getarch
set(GETARCH_SRC
${PROJECT_SOURCE_DIR}/getarch.c
${CPUIDEMU}
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR})
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
)
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC")
#Use generic for MSVC now
message("MSVC")
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
else()
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
if (NOT ${GETARCH_RESULT})
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
endif ()
endif ()
message(STATUS "Running getarch")
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
# disable WindowsStore strict CRT checks
set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS)
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
# append config data from getarch to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
ParseGetArchVars(${GETARCH_MAKE_OUT})
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH2_DIR})
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
)
if (NOT ${GETARCH2_RESULT})
MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
endif ()
endif ()
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR})
configure_file(${TARGET_CONF_TEMP} ${GETARCH_DIR}/${TARGET_CONF} COPYONLY)
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${GETARCH_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN}
)
if (NOT ${GETARCH_RESULT})
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
endif ()
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
# append config data from getarch_2nd to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
ParseGetArchVars(${GETARCH2_MAKE_OUT})
# compile get_config_h
set(GEN_CONFIG_H_DIR "${PROJECT_BINARY_DIR}/genconfig_h_build")
set(GEN_CONFIG_H_BIN "gen_config_h${CMAKE_EXECUTABLE_SUFFIX}")
set(GEN_CONFIG_H_FLAGS "-DVERSION=\"${OpenBLAS_VERSION}\"")
file(MAKE_DIRECTORY ${GEN_CONFIG_H_DIR})
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GEN_CONFIG_H_RESULT ${GEN_CONFIG_H_DIR}
SOURCES ${PROJECT_SOURCE_DIR}/gen_config_h.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GEN_CONFIG_H_FLAGS} -I${PROJECT_SOURCE_DIR}
OUTPUT_VARIABLE GEN_CONFIG_H_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GEN_CONFIG_H_BIN}
)
if (NOT ${GEN_CONFIG_H_RESULT})
MESSAGE(FATAL_ERROR "Compiling gen_config_h failed ${GEN_CONFIG_H_LOG}")
endif ()
message(STATUS "Running getarch")
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH_BIN}" 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
# append config data from getarch to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH_CONF_OUT})
ParseGetArchVars(${GETARCH_MAKE_OUT})
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH2_DIR})
configure_file(${TARGET_CONF_TEMP} ${GETARCH2_DIR}/${TARGET_CONF} COPYONLY)
if (NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${GETARCH2_DIR} -I"${PROJECT_SOURCE_DIR}" -I"${PROJECT_BINARY_DIR}"
OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN}
)
if (NOT ${GETARCH2_RESULT})
MESSAGE(FATAL_ERROR "Compiling getarch_2nd failed ${GETARCH2_LOG}")
endif ()
endif ()
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
execute_process(COMMAND "${PROJECT_BINARY_DIR}/${GETARCH2_BIN}" 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
# append config data from getarch_2nd to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF_TEMP} ${GETARCH2_CONF_OUT})
configure_file(${TARGET_CONF_TEMP} ${TARGET_CONF_DIR}/${TARGET_CONF} COPYONLY)
ParseGetArchVars(${GETARCH2_MAKE_OUT})
endif()
endif ()

View File

@@ -2,27 +2,13 @@
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from OpenBLAS/Makefile.system
##
set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib")
# System detection, via CMake.
include("${PROJECT_SOURCE_DIR}/cmake/system_check.cmake")
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
# http://stackoverflow.com/questions/714100/os-detecting-makefile
if(CMAKE_CROSSCOMPILING AND NOT DEFINED TARGET)
# Detect target without running getarch
if (ARM64)
set(TARGET "ARMV8")
elseif(ARM)
set(TARGET "ARMV7") # TODO: Ask compiler which arch this is
else()
message(FATAL_ERROR "When cross compiling, a TARGET is required.")
endif()
endif()
# Other files expect CORE, which is actually TARGET and will become TARGET_CORE for kernel build. Confused yet?
# It seems we are meant to use TARGET as input and CORE internally as kernel.
if(NOT DEFINED CORE AND DEFINED TARGET)
set(CORE ${TARGET})
endif()
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
if (DEFINED TARGET_CORE)
@@ -33,7 +19,7 @@ endif ()
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
message(STATUS "Compiling a ${BINARY}-bit binary.")
set(NO_AVX 1)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
set(TARGET "NEHALEM")
endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
@@ -42,23 +28,7 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
endif ()
if (DEFINED TARGET)
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
endif()
if (DEFINED TARGET)
message(STATUS "Targeting the ${TARGET} architecture.")
message(STATUS "Targetting the ${TARGET} architecture.")
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
endif ()
@@ -83,16 +53,21 @@ if (NO_AVX2)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
endif ()
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
set(GETARCH_FLAGS "${GETARCH_FLAGS} ${CMAKE_C_FLAGS_DEBUG}")
if (CMAKE_BUILD_TYPE STREQUAL Debug)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
endif ()
# TODO: let CMake handle this? -hpa
#if (${QUIET_MAKE})
# set(MAKE "${MAKE} -s")
#endif()
if (NOT DEFINED NO_PARALLEL_MAKE)
set(NO_PARALLEL_MAKE 0)
endif ()
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
if (CMAKE_C_COMPILER STREQUAL loongcc)
if (CMAKE_CXX_COMPILER STREQUAL loongcc)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -static")
endif ()
@@ -103,44 +78,51 @@ else ()
set(ONLY_CBLAS 0)
endif ()
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
if (NOT CMAKE_CROSSCOMPILING)
if (NOT DEFINED NUM_CORES)
include(ProcessorCount)
ProcessorCount(NUM_CORES)
endif()
endif()
if (NOT DEFINED NUM_PARALLEL)
set(NUM_PARALLEL 1)
endif()
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
if (NOT DEFINED NUM_THREADS)
if (DEFINED NUM_CORES AND NOT NUM_CORES EQUAL 0)
# HT?
set(NUM_THREADS ${NUM_CORES})
else ()
set(NUM_THREADS 0)
endif ()
endif()
set(NUM_THREADS ${NUM_CORES})
endif ()
if (${NUM_THREADS} LESS 2)
if (${NUM_THREADS} EQUAL 1)
set(USE_THREAD 0)
elseif(NOT DEFINED USE_THREAD)
set(USE_THREAD 1)
endif ()
if (USE_THREAD)
message(STATUS "Multi-threading enabled with ${NUM_THREADS} threads.")
if (DEFINED USE_THREAD)
if (NOT ${USE_THREAD})
unset(SMP)
else ()
set(SMP 1)
endif ()
else ()
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
if (${NUM_THREADS} EQUAL 1)
unset(SMP)
else ()
set(SMP 1)
endif ()
endif ()
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
if (${SMP})
message(STATUS "SMP enabled.")
endif ()
if (NOT DEFINED NEED_PIC)
set(NEED_PIC 1)
endif ()
# TODO: I think CMake should be handling all this stuff -hpa
unset(ARFLAGS)
set(CPP "${COMPILER} -E")
set(AR "${CROSS_SUFFIX}ar")
set(AS "${CROSS_SUFFIX}as")
set(LD "${CROSS_SUFFIX}ld")
set(RANLIB "${CROSS_SUFFIX}ranlib")
set(NM "${CROSS_SUFFIX}nm")
set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
set(OBJCOPY "${CROSS_SUFFIX}objcopy")
set(OBJCONV "${CROSS_SUFFIX}objconv")
# OS dependent settings
include("${PROJECT_SOURCE_DIR}/cmake/os.cmake")
@@ -168,20 +150,15 @@ if (NEED_PIC)
set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
endif ()
if (NOT NOFORTRAN)
if (${F_COMPILER} STREQUAL "SUN")
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
endif ()
endif()
if (${F_COMPILER} STREQUAL "SUN")
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
endif ()
endif ()
if (DYNAMIC_ARCH)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
if (DYNAMIC_OLDER)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_OLDER")
endif ()
endif ()
if (NO_LAPACK)
@@ -198,7 +175,7 @@ if (NO_AVX)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
endif ()
if (X86)
if (${ARCH} STREQUAL "x86")
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
endif ()
@@ -206,20 +183,25 @@ if (NO_AVX2)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
endif ()
if (USE_THREAD)
# USE_SIMPLE_THREADED_LEVEL3 = 1
# NO_AFFINITY = 1
if (SMP)
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
if (MIPS64)
if (${ARCH} STREQUAL "mips64")
if (NOT ${CORE} STREQUAL "LOONGSON3B")
set(USE_SIMPLE_THREADED_LEVEL3 1)
endif ()
endif ()
if (USE_OPENMP)
# USE_SIMPLE_THREADED_LEVEL3 = 1
# NO_AFFINITY = 1
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
endif ()
if (BIGNUMA)
set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
endif ()
endif ()
if (NO_WARMUP)
@@ -230,10 +212,6 @@ if (CONSISTENT_FPCSR)
set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
endif ()
if (USE_TLS)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_TLS")
endif ()
# Only for development
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
@@ -251,8 +229,6 @@ endif ()
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_PARALLEL_NUMBER=${NUM_PARALLEL}")
if (USE_SIMPLE_THREADED_LEVEL3)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
endif ()
@@ -288,7 +264,7 @@ if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
set(NO_AFFINITY 1)
endif ()
if (NOT X86_64 AND NOT X86 AND NOT ${CORE} STREQUAL "LOONGSON3B")
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
set(NO_AFFINITY 1)
endif ()
@@ -320,21 +296,52 @@ if (MIXED_MEMORY_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
endif ()
set(CCOMMON_OPT "${CCOMMON_OPT} -DVERSION=\"\\\"${OpenBLAS_VERSION}\\\"\"")
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
set(TAR gtar)
set(PATCH gpatch)
set(GREP ggrep)
else ()
set(TAR tar)
set(PATCH patch)
set(GREP grep)
endif ()
if (NOT DEFINED MD5SUM)
set(MD5SUM md5sum)
endif ()
set(AWK awk)
set(SED sed)
set(REVISION "-r${OpenBLAS_VERSION}")
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CCOMMON_OPT}")
if (DEBUG)
set(COMMON_OPT "${COMMON_OPT} -g")
endif ()
if (NOT DEFINED COMMON_OPT)
set(COMMON_OPT "-O2")
endif ()
#For x86 32-bit
if (DEFINED BINARY AND BINARY EQUAL 32)
if (NOT MSVC)
set(COMMON_OPT "${COMMON_OPT} -m32")
endif()
endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
if(NOT MSVC)
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CCOMMON_OPT}")
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
endif()
# TODO: not sure what PFLAGS is -hpa
set(PFLAGS "${PFLAGS} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${FCOMMON_OPT}")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
# TODO: not sure what FPFLAGS is -hpa
set(FPFLAGS "${FPFLAGS} ${FCOMMON_OPT} ${COMMON_PROF}")
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
#For LAPACK Fortran codes.
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
@@ -342,7 +349,7 @@ set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
#Disable -fopenmp for LAPACK Fortran codes on Windows.
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parallel")
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
foreach (FILTER_FLAG ${FILTER_FLAGS})
string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
@@ -380,7 +387,7 @@ if (NOT DEFINED LIBSUFFIX)
endif ()
if (DYNAMIC_ARCH)
if (USE_THREAD)
if (DEFINED SMP)
set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
else ()
@@ -388,7 +395,7 @@ if (DYNAMIC_ARCH)
set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
endif ()
else ()
if (USE_THREAD)
if (DEFINED SMP)
set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
else ()
@@ -419,9 +426,6 @@ if (NOT NO_LAPACK)
if (NOT NO_LAPACKE)
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
endif ()
if (BUILD_RELAPACK)
set(LIB_COMPONENTS "${LIB_COMPONENTS} ReLAPACK")
endif ()
endif ()
if (ONLY_CBLAS)
@@ -433,7 +437,7 @@ endif ()
set(USE_GEMM3M 0)
if (DEFINED ARCH)
if (X86 OR X86_64 OR ${ARCH} STREQUAL "ia64" OR MIPS64)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()
@@ -516,3 +520,35 @@ endif ()
# export CUFLAGS
# export CULIB
#endif
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
#
#.f.$(SUFFIX):
# $(FC) $(FFLAGS) -c $< -o $(@F)
#
#.f.$(PSUFFIX):
# $(FC) $(FPFLAGS) -pg -c $< -o $(@F)
# these are not cross-platform
#ifdef BINARY64
#PATHSCALEPATH = /opt/pathscale/lib/3.1
#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib
#else
#PATHSCALEPATH = /opt/pathscale/lib/3.1/32
#PGIPATH = /opt/pgi/linux86/7.1-5/lib
#endif
#ACMLPATH = /opt/acml/4.3.0
#ifneq ($(OSNAME), Darwin)
#MKLPATH = /opt/intel/mkl/10.2.2.025/lib
#else
#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
#endif
#ATLASPATH = /opt/atlas/3.9.17/opteron
#FLAMEPATH = $(HOME)/flame/lib
#ifneq ($(OSNAME), SunOS)
#SUNPATH = /opt/sunstudio12.1
#else
#SUNPATH = /opt/SUNWspro
#endif

View File

@@ -1,87 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from the OpenBLAS/c_check perl script.
## This is triggered by prebuild.cmake and runs before any of the code is built.
## Creates config.h and Makefile.conf.
# Convert CMake vars into the format that OpenBLAS expects
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
if (${HOST_OS} STREQUAL "WINDOWS")
set(HOST_OS WINNT)
endif ()
if (${HOST_OS} STREQUAL "LINUX")
# check if we're building natively on Android (TERMUX)
EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM)
if(${OPERATING_SYSTEM} MATCHES "Android")
set(HOST_OS ANDROID)
endif(${OPERATING_SYSTEM} MATCHES "Android")
endif()
if(CMAKE_COMPILER_IS_GNUCC AND WIN32)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine
OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE
OUTPUT_STRIP_TRAILING_WHITESPACE)
if(OPENBLAS_GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64")
set(MINGW64 1)
endif()
endif()
# Pretty thorough determination of arch. Add more if needed
if(CMAKE_CL_64 OR MINGW64)
set(X86_64 1)
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
set(X86 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*")
set(PPC 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips64.*")
set(MIPS64 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
set(X86_64 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
set(X86 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
set(ARM 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*)")
set(ARM64 1)
endif()
if (X86_64)
set(ARCH "x86_64")
elseif(X86)
set(ARCH "x86")
elseif(PPC)
set(ARCH "power")
elseif(ARM)
set(ARCH "arm")
elseif(ARM64)
set(ARCH "arm64")
else()
set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture")
endif ()
if (NOT BINARY)
if (X86_64 OR ARM64 OR PPC OR MIPS64)
set(BINARY 64)
else ()
set(BINARY 32)
endif ()
endif()
if(BINARY EQUAL 64)
set(BINARY64 1)
else()
set(BINARY32 1)
endif()
if (X86_64 OR X86)
file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512)
if (NO_AVX512 EQUAL 1)
set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
endif()
file(REMOVE "avx512.tmp" "avx512.o")
endif()

View File

@@ -202,8 +202,6 @@ function(GenerateNamedObjects sources_in)
if (use_cblas)
set(obj_name "cblas_${obj_name}")
list(APPEND obj_defines "CBLAS")
elseif (NOT "${obj_name}" MATCHES "${ARCH_SUFFIX}")
set(obj_name "${obj_name}${ARCH_SUFFIX}")
endif ()
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
@@ -236,9 +234,7 @@ function(GenerateNamedObjects sources_in)
string(REPLACE ";" "\n#define " define_source "${obj_defines}")
string(REPLACE "=" " " define_source "${define_source}")
file(WRITE ${new_source_file}.tmp "#define ${define_source}\n#include \"${old_source_file}\"")
configure_file(${new_source_file}.tmp ${new_source_file} COPYONLY)
file(REMOVE ${new_source_file}.tmp)
file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
list(APPEND SRC_LIST_OUT ${new_source_file})
endforeach ()

View File

@@ -93,7 +93,7 @@ extern "C" {
#include <sched.h>
#endif
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(OS_ANDROID)
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID)
#include <sched.h>
#endif
@@ -105,10 +105,6 @@ extern "C" {
#endif
#endif
#ifdef OS_HAIKU
#define NO_SYSV_IPC
#endif
#ifdef OS_WINDOWS
#ifdef ATOM
#define GOTO_ATOM ATOM
@@ -183,7 +179,7 @@ extern "C" {
#define ALLOCA_ALIGN 63UL
#define NUM_BUFFERS MAX(50,(MAX_CPU_NUMBER * 2 * MAX_PARALLEL_NUMBER))
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
#ifdef NEEDBUNDERSCORE
#define BLASFUNC(FUNC) FUNC##_
@@ -257,14 +253,8 @@ typedef unsigned long BLASULONG;
#ifdef USE64BITINT
typedef BLASLONG blasint;
#if defined(OS_WINDOWS) && defined(__64BIT__)
#define blasabs(x) llabs(x)
#else
#define blasabs(x) labs(x)
#endif
#else
typedef int blasint;
#define blasabs(x) abs(x)
#endif
#else
#ifdef USE64BITINT
@@ -505,33 +495,6 @@ static void __inline blas_lock(volatile BLASULONG *address){
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
#endif
#ifndef ASSEMBLER
/* C99 supports complex floating numbers natively, which GCC also offers as an
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
#define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>
#endif
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif
#endif
#include "param.h"
#include "common_param.h"
@@ -561,6 +524,31 @@ static void __inline blas_lock(volatile BLASULONG *address){
#include <stdio.h>
#endif // NOINCLUDE
/* C99 supports complex floating numbers natively, which GCC also offers as an
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
#define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>
#endif
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif
#ifdef XDOUBLE
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)
@@ -652,7 +640,6 @@ void gotoblas_profile_init(void);
void gotoblas_profile_quit(void);
#ifdef USE_OPENMP
#ifndef C_MSVC
int omp_in_parallel(void);
int omp_get_num_procs(void);
@@ -660,21 +647,6 @@ int omp_get_num_procs(void);
__declspec(dllimport) int __cdecl omp_in_parallel(void);
__declspec(dllimport) int __cdecl omp_get_num_procs(void);
#endif
#if (__STDC_VERSION__ >= 201112L)
#if defined(C_GCC) && ( __GNUC__ < 7)
// workaround for GCC bug 65467
#ifndef _Atomic
#define _Atomic volatile
#endif
#endif
#include <stdatomic.h>
#else
#ifndef _Atomic
#define _Atomic volatile
#endif
#endif
#else
#ifdef __ELF__
int omp_in_parallel (void) __attribute__ ((weak));

View File

@@ -47,14 +47,6 @@ __global__ void cuda_dgemm_kernel(int, int, int, double *, double *, double *);
extern "C" {
#endif
extern void sgemm_kernel_direct(BLASLONG M, BLASLONG N, BLASLONG K,
float * A, BLASLONG strideA,
float * B, BLASLONG strideB,
float * R, BLASLONG strideR);
extern int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int dgemm_beta(BLASLONG, BLASLONG, BLASLONG, double,

View File

@@ -94,7 +94,7 @@ static inline unsigned int rpcc(void){
#define RPCC_DEFINED
#ifndef NO_AFFINITY
//#define WHEREAMI
#define WHEREAMI
static inline int WhereAmI(void){
int ret=0;
__asm__ __volatile__(".set push \n"

View File

@@ -333,8 +333,8 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
float (*casum_k) (BLASLONG, float *, BLASLONG);
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float _Complex (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float _Complex (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
@@ -496,8 +496,8 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
double (*znrm2_k) (BLASLONG, double *, BLASLONG);
double (*zasum_k) (BLASLONG, double *, BLASLONG);
int (*zcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_double (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_double (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double _Complex (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double _Complex (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*zdrot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int (*zaxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@@ -661,8 +661,8 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
xdouble (*xnrm2_k) (BLASLONG, xdouble *, BLASLONG);
xdouble (*xasum_k) (BLASLONG, xdouble *, BLASLONG);
int (*xcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_xdouble (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_xdouble (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
xdouble _Complex (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
xdouble _Complex (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int (*xqrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
int (*xaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
@@ -888,7 +888,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG);
} gotoblas_t;

View File

@@ -47,15 +47,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* - large enough to support all architectures and kernel
* Chosing a too small SIZE will lead to a stack smashing.
*/
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
/* make it volatile because some function (ex: dgemv_n.S) */ \
/* do not restore all register */ \
volatile int stack_alloc_size = SIZE; \
if (stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) stack_alloc_size = 0; \
STACK_ALLOC_PROTECT_SET \
/* Avoid declaring an array of length 0 */ \
TYPE stack_buffer[stack_alloc_size ? stack_alloc_size : 1] \
__attribute__((aligned(0x20))); \
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
/* make it volatile because some function (ex: dgemv_n.S) */ \
/* do not restore all register */ \
volatile int stack_alloc_size = SIZE; \
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
stack_alloc_size = 0; \
STACK_ALLOC_PROTECT_SET \
TYPE stack_buffer[stack_alloc_size] __attribute__((aligned(0x20))); \
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
#else
//Original OpenBLAS/GotoBLAS codes.

View File

@@ -178,13 +178,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
result = x/y;
return result;
#else
#if (MAX_CPU_NUMBER > 64)
if ( y > 64) {
result = x/y;
return result;
}
#endif
y = blas_quick_divide_table[y];
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
@@ -333,7 +327,7 @@ REALNAME:
#endif
#endif
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(__ELF__)
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
#define PROLOGUE \
.text; \
.align 16; \

View File

@@ -60,13 +60,8 @@
#endif
*/
#ifdef __GNUC__
#define MB do { __asm__ __volatile__("": : :"memory"); } while (0)
#define WMB do { __asm__ __volatile__("": : :"memory"); } while (0)
#else
#define MB do {} while (0)
#define WMB do {} while (0)
#endif
#define MB
#define WMB
static void __inline blas_lock(volatile BLASULONG *address){
@@ -201,13 +196,6 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
if (y <= 1) return x;
#if (MAX_CPU_NUMBER > 64)
if (y > 64) {
result = x / y;
return result;
}
#endif
y = blas_quick_divide_table[y];
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
@@ -415,7 +403,7 @@ REALNAME:
#define EPILOGUE .end
#endif
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_OPENBSD) || defined(OS_DRAGONFLY) || defined(__ELF__) || defined(C_PGI)
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
#define PROLOGUE \
.text; \
.align 512; \

View File

@@ -115,7 +115,6 @@
#define CORE_STEAMROLLER 25
#define CORE_EXCAVATOR 26
#define CORE_ZEN 27
#define CORE_SKYLAKEX 28
#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
@@ -138,7 +137,6 @@
#define HAVE_AVX (1 << 18)
#define HAVE_FMA4 (1 << 19)
#define HAVE_FMA3 (1 << 20)
#define HAVE_AVX512VL (1 << 21)
#define CACHE_INFO_L1_I 1
#define CACHE_INFO_L1_D 2
@@ -213,6 +211,5 @@ typedef struct {
#define CPUTYPE_STEAMROLLER 49
#define CPUTYPE_EXCAVATOR 50
#define CPUTYPE_ZEN 51
#define CPUTYPE_SKYLAKEX 52
#endif

View File

@@ -34,7 +34,7 @@
#define CPU_CORTEXA15 4
static char *cpuname[] = {
"UNKNOWN",
"UNKOWN",
"ARMV6",
"ARMV7",
"CORTEXA9",

View File

@@ -29,37 +29,25 @@
#define CPU_UNKNOWN 0
#define CPU_ARMV8 1
// Arm
#define CPU_CORTEXA53 2
#define CPU_CORTEXA57 3
#define CPU_CORTEXA72 4
#define CPU_CORTEXA73 5
// Qualcomm
#define CPU_FALKOR 6
// Cavium
#define CPU_THUNDERX 7
#define CPU_THUNDERX2T99 8
#define CPU_CORTEXA57 2
#define CPU_VULCAN 3
#define CPU_THUNDERX 4
#define CPU_THUNDERX2T99 5
static char *cpuname[] = {
"UNKNOWN",
"ARMV8" ,
"CORTEXA53",
"CORTEXA57",
"CORTEXA72",
"CORTEXA73",
"FALKOR",
"VULCAN",
"THUNDERX",
"THUNDERX2T99"
};
static char *cpuname_lower[] = {
"unknown",
"armv8",
"cortexa53",
"armv8" ,
"cortexa57",
"cortexa72",
"cortexa73",
"falkor",
"vulcan",
"thunderx",
"thunderx2t99"
};
@@ -126,24 +114,13 @@ int detect(void)
fclose(infile);
if(cpu_part != NULL && cpu_implementer != NULL) {
// Arm
if (strstr(cpu_implementer, "0x41")) {
if (strstr(cpu_part, "0xd03"))
return CPU_CORTEXA53;
else if (strstr(cpu_part, "0xd07"))
return CPU_CORTEXA57;
else if (strstr(cpu_part, "0xd08"))
return CPU_CORTEXA72;
else if (strstr(cpu_part, "0xd09"))
return CPU_CORTEXA73;
}
// Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
return CPU_FALKOR;
// Cavium
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0a1"))
if (strstr(cpu_part, "0xd07") && strstr(cpu_implementer, "0x41"))
return CPU_CORTEXA57;
else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42"))
return CPU_VULCAN;
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43"))
return CPU_THUNDERX;
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
else if (strstr(cpu_part, "0xFFF") && strstr(cpu_implementer, "0x43")) /* TODO */
return CPU_THUNDERX2T99;
}
@@ -165,7 +142,7 @@ int detect(void)
if(p != NULL)
{
if ((strstr(p, "AArch64")) || (strstr(p, "8")))
if (strstr(p, "AArch64"))
{
return CPU_ARMV8;
@@ -202,63 +179,64 @@ void get_subdirname(void)
void get_cpuconfig(void)
{
// All arches should define ARMv8
printf("#define ARMV8\n");
printf("#define HAVE_NEON\n"); // This shouldn't be necessary
printf("#define HAVE_VFPV4\n"); // This shouldn't be necessary
int d = detect();
switch (d)
{
case CPU_CORTEXA53:
printf("#define %s\n", cpuname[d]);
// Fall-through
case CPU_ARMV8:
// Minimum parameters for ARMv8 (based on A53)
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 262144\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
printf("#define ARMV8\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 262144\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
break;
case CPU_VULCAN:
printf("#define VULCAN \n");
printf("#define HAVE_VFP \n");
printf("#define HAVE_VFPV3 \n");
printf("#define HAVE_NEON \n");
printf("#define HAVE_VFPV4 \n");
printf("#define L1_CODE_SIZE 32768 \n");
printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
printf("#define L1_DATA_SIZE 32768 \n");
printf("#define L1_DATA_LINESIZE 64 \n");
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
printf("#define L2_SIZE 262144 \n");
printf("#define L2_LINESIZE 64 \n");
printf("#define L2_ASSOCIATIVE 8 \n");
printf("#define L3_SIZE 33554432 \n");
printf("#define L3_LINESIZE 64 \n");
printf("#define L3_ASSOCIATIVE 32 \n");
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break;
case CPU_CORTEXA57:
case CPU_CORTEXA72:
case CPU_CORTEXA73:
// Common minimum settings for these Arm cores
// Can change a lot, but we need to be conservative
// TODO: detect info from /sys if possible
printf("#define %s\n", cpuname[d]);
printf("#define CORTEXA57\n");
printf("#define HAVE_VFP\n");
printf("#define HAVE_VFPV3\n");
printf("#define HAVE_NEON\n");
printf("#define HAVE_VFPV4\n");
printf("#define L1_CODE_SIZE 49152\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 3\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 2\n");
printf("#define L2_SIZE 524288\n");
printf("#define L2_SIZE 2097152\n");
printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 16\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
case CPU_FALKOR:
printf("#define FALKOR\n");
printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 128\n");
printf("#define L2_SIZE 524288\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 16\n");
break;
case CPU_THUNDERX:
printf("#define ARMV8\n");
printf("#define THUNDERX\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 128\n");
@@ -270,7 +248,11 @@ void get_cpuconfig(void)
break;
case CPU_THUNDERX2T99:
printf("#define THUNDERX2T99 \n");
printf("#define VULCAN \n");
printf("#define HAVE_VFP \n");
printf("#define HAVE_VFPV3 \n");
printf("#define HAVE_NEON \n");
printf("#define HAVE_VFPV4 \n");
printf("#define L1_CODE_SIZE 32768 \n");
printf("#define L1_CODE_LINESIZE 64 \n");
printf("#define L1_CODE_ASSOCIATIVE 8 \n");

View File

@@ -72,12 +72,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CPU_UNKNOWN 0
#define CPU_P5600 1
#define CPU_1004K 2
static char *cpuname[] = {
"UNKNOWN",
"P5600",
"1004K"
"UNKOWN",
"P5600"
};
int detect(void){
@@ -92,7 +90,7 @@ int detect(void){
if (!strncmp("cpu", buffer, 3)){
p = strchr(buffer, ':') + 2;
#if 0
fprintf(stderr, "%s \n", p);
fprintf(stderr, "%s\n", p);
#endif
break;
}
@@ -101,13 +99,43 @@ int detect(void){
fclose(infile);
if(p != NULL){
if (strstr(p, "5600")) {
return CPU_P5600;
} else if (strstr(p, "1004K")) {
return CPU_1004K;
} else
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}else if (strstr(p, "Loongson-3")){
infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("system type", buffer, 11)){
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if (strstr(p, "loongson3a"))
return CPU_LOONGSON3A;
}else{
return CPU_UNKNOWN;
}
}
//Check model name for Loongson3
infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("model name", buffer, 10)){
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}
}
#endif
return CPU_UNKNOWN;
}
@@ -121,7 +149,7 @@ void get_architecture(void){
}
void get_subarchitecture(void){
if(detect()==CPU_P5600|| detect()==CPU_1004K){
if(detect()==CPU_P5600){
printf("P5600");
}else{
printf("UNKNOWN");
@@ -142,14 +170,6 @@ void get_cpuconfig(void){
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
} else if (detect()==CPU_1004K) {
printf("#define MIPS1004K\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 26144\n");
printf("#define DTB_DEFAULT_ENTRIES 8\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
}else{
printf("#define UNKNOWN\n");
}
@@ -158,8 +178,6 @@ void get_cpuconfig(void){
void get_libname(void){
if(detect()==CPU_P5600) {
printf("p5600\n");
} else if (detect()==CPU_1004K) {
printf("1004K\n");
}else{
printf("mips\n");
}

View File

@@ -76,16 +76,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CPU_LOONGSON3B 3
#define CPU_I6400 4
#define CPU_P6600 5
#define CPU_I6500 6
static char *cpuname[] = {
"UNKNOWN",
"UNKOWN",
"SICORTEX",
"LOONGSON3A",
"LOONGSON3B",
"I6400",
"P6600",
"I6500"
"P6600"
};
int detect(void){
@@ -167,8 +165,6 @@ void get_subarchitecture(void){
printf("I6400");
}else if(detect()==CPU_P6600){
printf("P6600");
}else if(detect()==CPU_I6500){
printf("I6500");
}else{
printf("SICORTEX");
}
@@ -215,15 +211,6 @@ void get_cpuconfig(void){
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
}else if(detect()==CPU_I6500){
printf("#define I6500\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 8\n");
}else{
printf("#define SICORTEX\n");
printf("#define L1_DATA_SIZE 32768\n");
@@ -245,8 +232,6 @@ void get_libname(void){
printf("i6400\n");
}else if(detect()==CPU_P6600) {
printf("p6600\n");
}else if(detect()==CPU_I6500) {
printf("i6500\n");
}else{
printf("mips64\n");
}

View File

@@ -56,7 +56,6 @@
#define CPUTYPE_CELL 6
#define CPUTYPE_PPCG4 7
#define CPUTYPE_POWER8 8
#define CPUTYPE_POWER9 9
char *cpuname[] = {
"UNKNOWN",
@@ -67,8 +66,7 @@ char *cpuname[] = {
"POWER6",
"CELL",
"PPCG4",
"POWER8",
"POWER9"
"POWER8"
};
char *lowercpuname[] = {
@@ -80,8 +78,7 @@ char *lowercpuname[] = {
"power6",
"cell",
"ppcg4",
"power8",
"power9"
"power8"
};
char *corename[] = {
@@ -93,8 +90,7 @@ char *corename[] = {
"POWER6",
"CELL",
"PPCG4",
"POWER8",
"POWER8"
"POWER8"
};
int detect(void){
@@ -124,7 +120,6 @@ int detect(void){
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8;
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
@@ -132,33 +127,6 @@ int detect(void){
#endif
#ifdef _AIX
FILE *infile;
char buffer[512], *p;
p = (char *)NULL;
infile = popen("prtconf|grep 'Processor Type'", "r");
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("Pro", buffer, 3)){
p = strchr(buffer, ':') + 2;
#if 0
fprintf(stderr, "%s\n", p);
#endif
break;
}
}
pclose(infile);
if (!strncasecmp(p, "POWER3", 6)) return CPUTYPE_POWER3;
if (!strncasecmp(p, "POWER4", 6)) return CPUTYPE_POWER4;
if (!strncasecmp(p, "PPC970", 6)) return CPUTYPE_PPC970;
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8;
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
return CPUTYPE_POWER5;
#endif
@@ -174,52 +142,6 @@ int detect(void){
return CPUTYPE_PPC970;
#endif
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
int id;
__asm __volatile("mfpvr %0" : "=r"(id));
switch ( id >> 16 ) {
case 0x4e: // POWER9
return CPUTYPE_POWER8;
break;
case 0x4d:
case 0x4b: // POWER8/8E
return CPUTYPE_POWER8;
break;
case 0x4a:
case 0x3f: // POWER7/7E
return CPUTYPE_POWER6;
break;
case 0x3e:
return CPUTYPE_POWER6;
break;
case 0x3a:
return CPUTYPE_POWER5;
break;
case 0x35:
case 0x38: // POWER4 /4+
return CPUTYPE_POWER4;
break;
case 0x40:
case 0x41: // POWER3 /3+
return CPUTYPE_POWER3;
break;
case 0x39:
case 0x3c:
case 0x44:
case 0x45:
return CPUTYPE_PPC970;
break;
case 0x70:
return CPUTYPE_CELL;
break;
case 0x8003:
return CPUTYPE_PPCG4;
break;
default:
return CPUTYPE_UNKNOWN;
}
#endif
}
void get_architecture(void){

View File

@@ -49,7 +49,6 @@ void get_subdirname(void){
}
void get_cpuconfig(void){
printf("#define SPARC\n");
printf("#define V9\n");
printf("#define DTB_DEFAULT_ENTRIES 32\n");
}
@@ -57,8 +56,3 @@ void get_cpuconfig(void){
void get_libname(void){
printf("v9\n");
}
char *get_corename(void){
return "sparc";
}

View File

@@ -50,8 +50,6 @@
#ifdef NO_AVX
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
#define CORE_HASWELL CORE_NEHALEM
#define CPUTYPE_SKYLAKEX CPUTYPE_NEHALEM
#define CORE_SKYLAKEX CORE_NEHALEM
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
#define CORE_SANDYBRIDGE CORE_NEHALEM
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
@@ -73,23 +71,12 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
*edx = cpuInfo[3];
}
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
{
int cpuInfo[4] = {-1};
__cpuidex(cpuInfo, op, count);
*eax = cpuInfo[0];
*ebx = cpuInfo[1];
*ecx = cpuInfo[2];
*edx = cpuInfo[3];
}
#else
#ifndef CPUIDEMU
#if defined(__APPLE__) && defined(__i386__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
#else
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
@@ -103,19 +90,6 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
#endif
}
static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
__asm__ __volatile__
("mov %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
#else
__asm__ __volatile__
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
#endif
}
#endif
#else
@@ -159,10 +133,6 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
*edx = idlist[current].d;
}
void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
return cpuid (op, eax, ebx, ecx, edx);
}
#endif
#endif // _MSC_VER
@@ -342,9 +312,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level > 1) {
int numcalls =0 ;
cpuid(2, &eax, &ebx, &ecx, &edx);
numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
info[ 0] = BITMASK(eax, 8, 0xff);
info[ 1] = BITMASK(eax, 16, 0xff);
info[ 2] = BITMASK(eax, 24, 0xff);
@@ -365,6 +335,7 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
info[14] = BITMASK(edx, 24, 0xff);
for (i = 0; i < 15; i++){
switch (info[i]){
/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
@@ -666,13 +637,12 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LD1.linesize = 64;
break;
case 0x63 :
DTB.size = 2048;
DTB.associative = 4;
DTB.linesize = 32;
LDTB.size = 4096;
LDTB.associative= 4;
LDTB.linesize = 32;
break;
DTB.size = 2048;
DTB.associative = 4;
DTB.linesize = 32;
LDTB.size = 4096;
LDTB.associative= 4;
LDTB.linesize = 32;
case 0x66 :
LD1.size = 8;
LD1.associative = 4;
@@ -705,13 +675,12 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LC1.associative = 8;
break;
case 0x76 :
ITB.size = 2048;
ITB.associative = 0;
ITB.linesize = 8;
LITB.size = 4096;
LITB.associative= 0;
LITB.linesize = 8;
break;
ITB.size = 2048;
ITB.associative = 0;
ITB.linesize = 8;
LITB.size = 4096;
LITB.associative= 0;
LITB.linesize = 8;
case 0x77 :
LC1.size = 16;
LC1.associative = 4;
@@ -922,67 +891,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
}
if (get_vendor() == VENDOR_INTEL) {
if(LD1.size<=0 || LC1.size<=0){
//If we didn't detect L1 correctly before,
int count;
for (count=0;count <4;count++) {
cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
switch (eax &0x1f) {
case 0:
continue;
case 1:
case 3:
{
switch ((eax >>5) &0x07)
{
case 1:
{
// fprintf(stderr,"L1 data cache...\n");
int sets = ecx+1;
int lines = (ebx & 0x0fff) +1;
ebx>>=12;
int part = (ebx&0x03ff)+1;
ebx >>=10;
int assoc = (ebx&0x03ff)+1;
LD1.size = (assoc*part*lines*sets)/1024;
LD1.associative = assoc;
LD1.linesize= lines;
break;
}
default:
break;
}
break;
}
case 2:
{
switch ((eax >>5) &0x07)
{
case 1:
{
// fprintf(stderr,"L1 instruction cache...\n");
int sets = ecx+1;
int lines = (ebx & 0x0fff) +1;
ebx>>=12;
int part = (ebx&0x03ff)+1;
ebx >>=10;
int assoc = (ebx&0x03ff)+1;
LC1.size = (assoc*part*lines*sets)/1024;
LC1.associative = assoc;
LC1.linesize= lines;
break;
}
default:
break;
}
break;
}
default:
break;
}
}
}
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level >= 0x80000006) {
if(L2.size<=0){
@@ -1301,19 +1209,6 @@ int get_cpuname(void){
else
return CPUTYPE_NEHALEM;
case 5:
// Skylake X
#ifndef NO_AVX512
return CPUTYPE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
#endif
case 14:
// Skylake
if(support_avx())
@@ -1339,23 +1234,6 @@ int get_cpuname(void){
return CPUTYPE_NEHALEM;
}
break;
case 6:
switch (model) {
case 6: // Cannon Lake
#ifndef NO_AVX512
return CPUTYPE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_HASWELL;
#else
return CPUTYPE_SANDYBRIDGE;
#endif
else
return CPUTYPE_NEHALEM;
#endif
}
break;
case 9:
case 8:
switch (model) {
@@ -1452,8 +1330,6 @@ int get_cpuname(void){
switch (model) {
case 1:
// AMD Ryzen
case 8:
// AMD Ryzen2
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_ZEN;
@@ -1590,7 +1466,6 @@ static char *cpuname[] = {
"STEAMROLLER",
"EXCAVATOR",
"ZEN",
"SKYLAKEX"
};
static char *lowercpuname[] = {
@@ -1645,11 +1520,10 @@ static char *lowercpuname[] = {
"steamroller",
"excavator",
"zen",
"skylakex"
};
static char *corename[] = {
"UNKNOWN",
"UNKOWN",
"80486",
"P5",
"P6",
@@ -1677,7 +1551,6 @@ static char *corename[] = {
"STEAMROLLER",
"EXCAVATOR",
"ZEN",
"SKYLAKEX"
};
static char *corename_lower[] = {
@@ -1709,7 +1582,6 @@ static char *corename_lower[] = {
"steamroller",
"excavator",
"zen",
"skylakex"
};
@@ -1815,8 +1687,6 @@ int get_coretype(void){
break;
case 3:
switch (model) {
case 7:
return CORE_ATOM;
case 10:
case 14:
if(support_avx())
@@ -1898,19 +1768,6 @@ int get_coretype(void){
else
return CORE_NEHALEM;
case 5:
// Skylake X
#ifndef NO_AVX512
return CORE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
#endif
case 14:
// Skylake
if(support_avx())
@@ -2009,8 +1866,6 @@ int get_coretype(void){
switch (model) {
case 1:
// AMD Ryzen
case 8:
// Ryzen 2
if(support_avx())
#ifndef NO_AVX2
return CORE_ZEN;

View File

@@ -29,18 +29,15 @@
#define CPU_GENERIC 0
#define CPU_Z13 1
#define CPU_Z14 2
static char *cpuname[] = {
"ZARCH_GENERIC",
"Z13",
"Z14"
"Z13"
};
static char *cpuname_lower[] = {
"zarch_generic",
"z13",
"z14"
"z13"
};
int detect(void)
@@ -65,10 +62,6 @@ int detect(void)
if (strstr(p, "2964")) return CPU_Z13;
if (strstr(p, "2965")) return CPU_Z13;
/* detect z14, but fall back to z13 */
if (strstr(p, "3906")) return CPU_Z13;
if (strstr(p, "3907")) return CPU_Z13;
return CPU_GENERIC;
}
@@ -114,9 +107,5 @@ void get_cpuconfig(void)
printf("#define Z13\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
break;
case CPU_Z14:
printf("#define Z14\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
break;
}
}

12
ctest.c
View File

@@ -60,14 +60,6 @@ OS_FREEBSD
OS_NETBSD
#endif
#if defined(__OpenBSD__)
OS_OPENBSD
#endif
#if defined(__DragonFly__)
OS_DRAGONFLY
#endif
#if defined(__sun)
OS_SUNOS
#endif
@@ -101,10 +93,6 @@ OS_INTERIX
OS_LINUX
#endif
#if defined(__HAIKU__)
OS_HAIKU
#endif
#if defined(__i386) || defined(_X86)
ARCH_X86
#endif

View File

@@ -1,5 +1,4 @@
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${PROJECT_BINARY_DIR})
enable_language(Fortran)
@@ -16,7 +15,7 @@ foreach(float_type ${FLOAT_TYPES})
add_executable(x${float_char}cblat1
c_${float_char}blat1.f
c_${float_char}blas1.c)
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME})
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static)
add_test(NAME "x${float_char}cblat1"
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1")
@@ -28,7 +27,7 @@ foreach(float_type ${FLOAT_TYPES})
auxiliary.c
c_xerbla.c
constant.c)
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME})
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static)
add_test(NAME "x${float_char}cblat2"
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2")
@@ -40,7 +39,7 @@ foreach(float_type ${FLOAT_TYPES})
auxiliary.c
c_xerbla.c
constant.c)
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME})
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static)
add_test(NAME "x${float_char}cblat3"
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3")

View File

@@ -102,13 +102,7 @@ clean ::
rm -f x*
FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
ifeq ($(USE_OPENMP), 1)
ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(C_COMPILER), CLANG)
CEXTRALIB = -lomp
endif
endif
endif
CEXTRALIB =
# Single real
xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME)

View File

@@ -9,7 +9,7 @@
#include "common.h"
#include "cblas_test.h"
void F77_caxpy(const int *N, OPENBLAS_CONST void *alpha, void *X,
void F77_caxpy(const int *N, const void *alpha, void *X,
const int *incX, void *Y, const int *incY)
{
cblas_caxpy(*N, alpha, X, *incX, Y, *incY);
@@ -58,13 +58,13 @@ void F77_cswap( const int *N, void *X, const int *incX,
return;
}
int F77_icamax(const int *N, OPENBLAS_CONST void *X, const int *incX)
int F77_icamax(const int *N, const void *X, const int *incX)
{
if (*N < 1 || *incX < 1) return(0);
return (cblas_icamax(*N, X, *incX)+1);
}
float F77_scnrm2(const int *N, OPENBLAS_CONST void *X, const int *incX)
float F77_scnrm2(const int *N, const void *X, const int *incX)
{
return cblas_scnrm2(*N, X, *incX);
}

View File

@@ -9,9 +9,9 @@
#include "cblas_test.h"
void F77_cgemv(int *order, char *transp, int *m, int *n,
OPENBLAS_CONST void *alpha,
CBLAS_TEST_COMPLEX *a, int *lda, OPENBLAS_CONST void *x, int *incx,
OPENBLAS_CONST void *beta, void *y, int *incy) {
const void *alpha,
CBLAS_TEST_COMPLEX *a, int *lda, const void *x, int *incx,
const void *beta, void *y, int *incy) {
CBLAS_TEST_COMPLEX *A;
int i,j,LDA;

View File

@@ -349,13 +349,13 @@
CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL CCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
$ 1 )
$ 1 )
END IF
GO TO 200
* Test CGERC, 12, CGERU, 13.
@@ -2660,7 +2660,7 @@
50 CONTINUE
END IF
*
C 60 CONTINUE
60 CONTINUE
LCERES = .TRUE.
GO TO 80
70 CONTINUE

View File

@@ -329,13 +329,13 @@
CALL CCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL CCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
$ 1 )
$ 1 )
END IF
GO TO 190
* Test CHERK, 06, CSYRK, 07.
@@ -357,13 +357,13 @@
CALL CCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL CCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
$ 1 )
$ 1 )
END IF
GO TO 190
*
@@ -707,9 +707,9 @@
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
C $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3,
C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
$ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3,
$ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1033,9 +1033,9 @@ C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
C $ ',', F4.1, '), C,', I3, ') .' )
9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
$ ',', F4.1, '), C,', I3, ') .' )
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1385,9 +1385,9 @@ C $ ',', F4.1, '), C,', I3, ') .' )
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT(' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ',
C $ ' .' )
9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ',
$ ' .' )
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1768,12 +1768,12 @@ C $ ' .' )
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ',
C $ ' .' )
C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1,
C $ '), C,', I3, ') .' )
9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ',
$ ' .' )
9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1,
$ '), C,', I3, ') .' )
9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -2221,12 +2221,12 @@ C $ '), C,', I3, ') .' )
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1,
C $ ', C,', I3, ') .' )
C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
C $ ',', F4.1, '), C,', I3, ') .' )
9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1,
$ ', C,', I3, ') .' )
9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
$ ',', F4.1, '), C,', I3, ') .' )
9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -2702,7 +2702,7 @@ C $ ',', F4.1, '), C,', I3, ') .' )
50 CONTINUE
END IF
*
C 60 CONTINUE
60 CONTINUE
LCERES = .TRUE.
GO TO 80
70 CONTINUE

View File

@@ -14,7 +14,7 @@ double F77_dasum(const int *N, double *X, const int *incX)
return cblas_dasum(*N, X, *incX);
}
void F77_daxpy(const int *N, const double *alpha, OPENBLAS_CONST double *X,
void F77_daxpy(const int *N, const double *alpha, const double *X,
const int *incX, double *Y, const int *incY)
{
cblas_daxpy(*N, *alpha, X, *incX, Y, *incY);
@@ -28,13 +28,13 @@ void F77_dcopy(const int *N, double *X, const int *incX,
return;
}
double F77_ddot(const int *N, OPENBLAS_CONST double *X, const int *incX,
OPENBLAS_CONST double *Y, const int *incY)
double F77_ddot(const int *N, const double *X, const int *incX,
const double *Y, const int *incY)
{
return cblas_ddot(*N, X, *incX, Y, *incY);
}
double F77_dnrm2(const int *N, OPENBLAS_CONST double *X, const int *incX)
double F77_dnrm2(const int *N, const double *X, const int *incX)
{
return cblas_dnrm2(*N, X, *incX);
}
@@ -72,12 +72,12 @@ double F77_dzasum(const int *N, void *X, const int *incX)
return cblas_dzasum(*N, X, *incX);
}
double F77_dznrm2(const int *N, OPENBLAS_CONST void *X, const int *incX)
double F77_dznrm2(const int *N, const void *X, const int *incX)
{
return cblas_dznrm2(*N, X, *incX);
}
int F77_idamax(const int *N, OPENBLAS_CONST double *X, const int *incX)
int F77_idamax(const int *N, const double *X, const int *incX)
{
if (*N < 1 || *incX < 1) return(0);
return (cblas_idamax(*N, X, *incX)+1);

View File

@@ -211,11 +211,11 @@
IF (ICASE.EQ.7) THEN
* .. DNRM2TEST ..
STEMP(1) = DTRUE1(NP1)
CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
CALL STEST1(DNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC)
ELSE IF (ICASE.EQ.8) THEN
* .. DASUMTEST ..
STEMP(1) = DTRUE3(NP1)
CALL STEST1(DASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
CALL STEST1(DASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC)
ELSE IF (ICASE.EQ.9) THEN
* .. DSCALTEST ..
CALL DSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX)

View File

@@ -345,13 +345,13 @@
CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
$ 1 )
$ 1 )
END IF
GO TO 200
* Test DGER, 12.
@@ -797,9 +797,9 @@
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', 4( I3, ',' ), F4.1,
$ ', A,', I3, ',',/ 10x,'X,', I2, ',', F4.1, ', Y,',
@@ -1004,7 +1004,7 @@ C $ ' - SUSPECT *******' )
$ REWIND NTRA
CALL CDSBMV( IORDER, UPLO, N, K, ALPHA,
$ AA, LDA, XX, INCX, BETA, YY,
$ INCY )
$ INCY )
ELSE IF( PACKED )THEN
IF( TRACE )
$ WRITE( NTRA, FMT = 9995 )NC, SNAME,
@@ -1156,9 +1156,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', AP',
$ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' )
@@ -1191,7 +1191,7 @@ C $ ' - SUSPECT *******' )
* .. Scalar Arguments ..
DOUBLE PRECISION EPS, THRESH
INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA,
$ IORDER
$ IORDER
LOGICAL FATAL, REWI, TRACE
CHARACTER*12 SNAME
* .. Array Arguments ..
@@ -1216,7 +1216,7 @@ C $ ' - SUSPECT *******' )
EXTERNAL LDE, LDERES
* .. External Subroutines ..
EXTERNAL DMAKE, DMVCH, CDTBMV, CDTBSV, CDTPMV,
$ CDTPSV, CDTRMV, CDTRSV
$ CDTPSV, CDTRMV, CDTRSV
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX
* .. Scalars in Common ..
@@ -1544,9 +1544,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( 1X, I6, ': ',A12, '(', 3( A14,',' ),/ 10x, I3, ', AP, ',
$ 'X,', I2, ') .' )
@@ -1579,7 +1579,7 @@ C $ ' - SUSPECT *******' )
* .. Scalar Arguments ..
DOUBLE PRECISION EPS, THRESH
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
$ IORDER
$ IORDER
LOGICAL FATAL, REWI, TRACE
CHARACTER*12 SNAME
* .. Array Arguments ..
@@ -1819,9 +1819,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
9994 FORMAT( 1X, I6, ': ',A12, '(', 2( I3, ',' ), F4.1, ', X,', I2,
@@ -1851,7 +1851,7 @@ C $ ' - SUSPECT *******' )
* .. Scalar Arguments ..
DOUBLE PRECISION EPS, THRESH
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
$ IORDER
$ IORDER
LOGICAL FATAL, REWI, TRACE
CHARACTER*12 SNAME
* .. Array Arguments ..
@@ -1973,7 +1973,7 @@ C $ ' - SUSPECT *******' )
IF( REWI )
$ REWIND NTRA
CALL CDSYR( IORDER, UPLO, N, ALPHA, XX, INCX,
$ AA, LDA )
$ AA, LDA )
ELSE IF( PACKED )THEN
IF( TRACE )
$ WRITE( NTRA, FMT = 9994 )NC, SNAME, CUPLO, N,
@@ -2113,9 +2113,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,',
@@ -2147,7 +2147,7 @@ C $ ' - SUSPECT *******' )
* .. Scalar Arguments ..
DOUBLE PRECISION EPS, THRESH
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
$ IORDER
$ IORDER
LOGICAL FATAL, REWI, TRACE
CHARACTER*12 SNAME
* .. Array Arguments ..
@@ -2445,9 +2445,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,',
@@ -2833,7 +2833,7 @@ C $ ' - SUSPECT *******' )
50 CONTINUE
END IF
*
C 60 CONTINUE
60 CONTINUE
LDERES = .TRUE.
GO TO 80
70 CONTINUE

View File

@@ -56,7 +56,7 @@
* .. Local Scalars ..
DOUBLE PRECISION EPS, ERR, THRESH
INTEGER I, ISNUM, J, N, NALF, NBET, NIDIM, NTRA,
$ LAYOUT
$ LAYOUT
LOGICAL FATAL, LTESTT, REWI, SAME, SFATAL, TRACE,
$ TSTERR, CORDER, RORDER
CHARACTER*1 TRANSA, TRANSB
@@ -78,7 +78,7 @@
EXTERNAL DDIFF, LDE
* .. External Subroutines ..
EXTERNAL DCHK1, DCHK2, DCHK3, DCHK4, DCHK5, CD3CHKE,
$ DMMCH
$ DMMCH
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
* .. Scalars in Common ..
@@ -323,13 +323,13 @@
CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL DCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
$ 1 )
$ 1 )
END IF
GO TO 190
* Test DSYRK, 05.
@@ -351,13 +351,13 @@
CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL DCHK5( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
$ 1 )
$ 1 )
END IF
GO TO 190
*
@@ -588,7 +588,7 @@
$ REWIND NTRA
CALL CDGEMM( IORDER, TRANSA, TRANSB, M, N,
$ K, ALPHA, AA, LDA, BB, LDB,
$ BETA, CC, LDC )
$ BETA, CC, LDC )
*
* Check if error-exit was taken incorrectly.
*
@@ -694,9 +694,9 @@
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
C $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ',
C $ 'C,', I3, ').' )
9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
$ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ',
$ 'C,', I3, ').' )
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1007,9 +1007,9 @@ C $ 'C,', I3, ').' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
C $ ' .' )
9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
$ ' .' )
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1201,7 +1201,7 @@ C $ ' .' )
$ REWIND NTRA
CALL CDTRMM( IORDER, SIDE, UPLO, TRANSA,
$ DIAG, M, N, ALPHA, AA, LDA,
$ BB, LDB )
$ BB, LDB )
ELSE IF( SNAME( 10: 11 ).EQ.'sm' )THEN
IF( TRACE )
$ CALL DPRCN3( NTRA, NC, SNAME, IORDER,
@@ -1211,7 +1211,7 @@ C $ ' .' )
$ REWIND NTRA
CALL CDTRSM( IORDER, SIDE, UPLO, TRANSA,
$ DIAG, M, N, ALPHA, AA, LDA,
$ BB, LDB )
$ BB, LDB )
END IF
*
* Check if error-exit was taken incorrectly.
@@ -1355,8 +1355,8 @@ C $ ' .' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ', B,', I3, ') .' )
9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ', B,', I3, ') .' )
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1681,8 +1681,8 @@ C $ F4.1, ', A,', I3, ', B,', I3, ') .' )
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1726,7 +1726,7 @@ C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
SUBROUTINE DCHK5( SNAME, EPS, THRESH, NOUT, NTRA, TRACE, REWI,
$ FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET, NMAX,
$ AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
$ IORDER )
$ IORDER )
*
* Tests DSYR2K.
*
@@ -1888,7 +1888,7 @@ C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
$ REWIND NTRA
CALL CDSYR2K( IORDER, UPLO, TRANS, N, K,
$ ALPHA, AA, LDA, BB, LDB, BETA,
$ CC, LDC )
$ CC, LDC )
*
* Check if error-exit was taken incorrectly.
*
@@ -2037,9 +2037,9 @@ C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
C $ ' .' )
9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
$ ' .' )
9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -2399,7 +2399,7 @@ C $ ' .' )
50 CONTINUE
END IF
*
C 60 CONTINUE
60 CONTINUE
LDERES = .TRUE.
GO TO 80
70 CONTINUE

View File

@@ -14,7 +14,7 @@ float F77_sasum(blasint *N, float *X, blasint *incX)
return cblas_sasum(*N, X, *incX);
}
void F77_saxpy(blasint *N, const float *alpha, OPENBLAS_CONST float *X,
void F77_saxpy(blasint *N, const float *alpha, const float *X,
blasint *incX, float *Y, blasint *incY)
{
cblas_saxpy(*N, *alpha, X, *incX, Y, *incY);
@@ -26,25 +26,25 @@ float F77_scasum(blasint *N, float *X, blasint *incX)
return cblas_scasum(*N, X, *incX);
}
float F77_scnrm2(blasint *N, OPENBLAS_CONST float *X, blasint *incX)
float F77_scnrm2(blasint *N, const float *X, blasint *incX)
{
return cblas_scnrm2(*N, X, *incX);
}
void F77_scopy(blasint *N, OPENBLAS_CONST float *X, blasint *incX,
void F77_scopy(blasint *N, const float *X, blasint *incX,
float *Y, blasint *incY)
{
cblas_scopy(*N, X, *incX, Y, *incY);
return;
}
float F77_sdot(blasint *N, OPENBLAS_CONST float *X, blasint *incX,
OPENBLAS_CONST float *Y, blasint *incY)
float F77_sdot(blasint *N, const float *X, blasint *incX,
const float *Y, blasint *incY)
{
return cblas_sdot(*N, X, *incX, Y, *incY);
}
float F77_snrm2(blasint *N, OPENBLAS_CONST float *X, blasint *incX)
float F77_snrm2(blasint *N, const float *X, blasint *incX)
{
return cblas_snrm2(*N, X, *incX);
}
@@ -76,7 +76,7 @@ void F77_sswap( blasint *N, float *X, blasint *incX,
return;
}
int F77_isamax(blasint *N, OPENBLAS_CONST float *X, blasint *incX)
int F77_isamax(blasint *N, const float *X, blasint *incX)
{
if (*N < 1 || *incX < 1) return(0);
return (cblas_isamax(*N, X, *incX)+1);

View File

@@ -211,11 +211,11 @@
IF (ICASE.EQ.7) THEN
* .. SNRM2TEST ..
STEMP(1) = DTRUE1(NP1)
CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
CALL STEST1(SNRM2TEST(N,SX,INCX),STEMP,STEMP,SFAC)
ELSE IF (ICASE.EQ.8) THEN
* .. SASUMTEST ..
STEMP(1) = DTRUE3(NP1)
CALL STEST1(SASUMTEST(N,SX,INCX),STEMP(1),STEMP,SFAC)
CALL STEST1(SASUMTEST(N,SX,INCX),STEMP,STEMP,SFAC)
ELSE IF (ICASE.EQ.9) THEN
* .. SSCALTEST ..
CALL SSCALTEST(N,SA((INCX-1)*5+NP1),SX,INCX)

View File

@@ -345,13 +345,13 @@
CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL SCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
$ 1 )
$ 1 )
END IF
GO TO 200
* Test SGER, 12.
@@ -797,9 +797,9 @@
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', 4( I3, ',' ), F4.1,
$ ', A,', I3, ',',/ 10x, 'X,', I2, ',', F4.1, ', Y,',
@@ -1004,7 +1004,7 @@ C $ ' - SUSPECT *******' )
$ REWIND NTRA
CALL CSSBMV( IORDER, UPLO, N, K, ALPHA,
$ AA, LDA, XX, INCX, BETA, YY,
$ INCY )
$ INCY )
ELSE IF( PACKED )THEN
IF( TRACE )
$ WRITE( NTRA, FMT = 9995 )NC, SNAME,
@@ -1156,9 +1156,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', AP',
$ ', X,', I2, ',', F4.1, ', Y,', I2, ') .' )
@@ -1191,7 +1191,7 @@ C $ ' - SUSPECT *******' )
* .. Scalar Arguments ..
REAL EPS, THRESH
INTEGER INCMAX, NIDIM, NINC, NKB, NMAX, NOUT, NTRA,
$ IORDER
$ IORDER
LOGICAL FATAL, REWI, TRACE
CHARACTER*12 SNAME
* .. Array Arguments ..
@@ -1216,7 +1216,7 @@ C $ ' - SUSPECT *******' )
EXTERNAL LSE, LSERES
* .. External Subroutines ..
EXTERNAL SMAKE, SMVCH, CSTBMV, CSTBSV, CSTPMV,
$ CSTPSV, CSTRMV, CSTRSV
$ CSTPSV, CSTRMV, CSTRSV
* .. Intrinsic Functions ..
INTRINSIC ABS, MAX
* .. Scalars in Common ..
@@ -1544,9 +1544,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( 1X, I6, ': ',A12, '(', 3( A14,',' ),/ 10x, I3, ', AP, ',
$ 'X,', I2, ') .' )
@@ -1579,7 +1579,7 @@ C $ ' - SUSPECT *******' )
* .. Scalar Arguments ..
REAL EPS, THRESH
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
$ IORDER
$ IORDER
LOGICAL FATAL, REWI, TRACE
CHARACTER*12 SNAME
* .. Array Arguments ..
@@ -1819,9 +1819,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
9994 FORMAT( 1X, I6, ': ',A12, '(', 2( I3, ',' ), F4.1, ', X,', I2,
@@ -1851,7 +1851,7 @@ C $ ' - SUSPECT *******' )
* .. Scalar Arguments ..
REAL EPS, THRESH
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
$ IORDER
$ IORDER
LOGICAL FATAL, REWI, TRACE
CHARACTER*12 SNAME
* .. Array Arguments ..
@@ -1973,7 +1973,7 @@ C $ ' - SUSPECT *******' )
IF( REWI )
$ REWIND NTRA
CALL CSSYR( IORDER, UPLO, N, ALPHA, XX, INCX,
$ AA, LDA )
$ AA, LDA )
ELSE IF( PACKED )THEN
IF( TRACE )
$ WRITE( NTRA, FMT = 9994 )NC, SNAME, CUPLO, N,
@@ -2113,9 +2113,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,',
@@ -2147,7 +2147,7 @@ C $ ' - SUSPECT *******' )
* .. Scalar Arguments ..
REAL EPS, THRESH
INTEGER INCMAX, NALF, NIDIM, NINC, NMAX, NOUT, NTRA,
$ IORDER
$ IORDER
LOGICAL FATAL, REWI, TRACE
CHARACTER*12 SNAME
* .. Array Arguments ..
@@ -2445,9 +2445,9 @@ C $ ' - SUSPECT *******' )
$ ' (', I6, ' CALL', 'S)' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
C 9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
C $ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
C $ ' - SUSPECT *******' )
9997 FORMAT( ' ',A12, ' COMPLETED THE COMPUTATIONAL TESTS (', I6, ' C',
$ 'ALLS)', /' ******* BUT WITH MAXIMUM TEST RATIO', F8.2,
$ ' - SUSPECT *******' )
9996 FORMAT( ' ******* ',A12, ' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
9994 FORMAT( 1X, I6, ': ',A12, '(', A14, ',', I3, ',', F4.1, ', X,',
@@ -2833,7 +2833,7 @@ C $ ' - SUSPECT *******' )
50 CONTINUE
END IF
*
C 60 CONTINUE
60 CONTINUE
LSERES = .TRUE.
GO TO 80
70 CONTINUE

View File

@@ -694,9 +694,9 @@
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
C $ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ',
C $ 'C,', I3, ').' )
9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
$ 3( I3, ',' ), F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', ',
$ 'C,', I3, ').' )
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1011,9 +1011,9 @@ C $ 'C,', I3, ').' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
C $ ' .' )
9995 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
$ ' .' )
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1359,8 +1359,8 @@ C $ ' .' )
9998 FORMAT( ' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ', B,', I3, ') .' )
9995 FORMAT( 1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ', B,', I3, ') .' )
9994 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1686,8 +1686,8 @@ C $ F4.1, ', A,', I3, ', B,', I3, ') .' )
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -2041,9 +2041,9 @@ C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') .' )
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
C 9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
C $ ' .' )
9994 FORMAT( 1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ', B,', I3, ',', F4.1, ', C,', I3, ') ',
$ ' .' )
9993 FORMAT( ' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -2403,7 +2403,7 @@ C $ ' .' )
50 CONTINUE
END IF
*
C 60 CONTINUE
60 CONTINUE
LSERES = .TRUE.
GO TO 80
70 CONTINUE

View File

@@ -131,7 +131,7 @@ void F77_xerbla(char *srname, void *vinfo)
int BLASFUNC(xerbla)(char *name, blasint *info, blasint length) {
F77_xerbla(name, info);
return 0;
};

View File

@@ -9,7 +9,7 @@
#include "common.h"
#include "cblas_test.h"
void F77_zaxpy(const int *N, OPENBLAS_CONST void *alpha, void *X,
void F77_zaxpy(const int *N, const void *alpha, void *X,
const int *incX, void *Y, const int *incY)
{
cblas_zaxpy(*N, alpha, X, *incX, Y, *incY);
@@ -23,8 +23,8 @@ void F77_zcopy(const int *N, void *X, const int *incX,
return;
}
void F77_zdotc(const int *N, OPENBLAS_CONST void *X, const int *incX,
OPENBLAS_CONST void *Y, const int *incY,void *dotc)
void F77_zdotc(const int *N, const void *X, const int *incX,
const void *Y, const int *incY,void *dotc)
{
cblas_zdotc_sub(*N, X, *incX, Y, *incY, dotc);
return;
@@ -58,13 +58,13 @@ void F77_zswap( const int *N, void *X, const int *incX,
return;
}
int F77_izamax(const int *N, OPENBLAS_CONST void *X, const int *incX)
int F77_izamax(const int *N, const void *X, const int *incX)
{
if (*N < 1 || *incX < 1) return(0);
return(cblas_izamax(*N, X, *incX)+1);
}
double F77_dznrm2(const int *N, OPENBLAS_CONST void *X, const int *incX)
double F77_dznrm2(const int *N, const void *X, const int *incX)
{
return cblas_dznrm2(*N, X, *incX);
}

View File

@@ -9,9 +9,9 @@
#include "cblas_test.h"
void F77_zgemv(int *order, char *transp, int *m, int *n,
OPENBLAS_CONST void *alpha,
CBLAS_TEST_ZOMPLEX *a, int *lda, OPENBLAS_CONST void *x, int *incx,
OPENBLAS_CONST void *beta, void *y, int *incy) {
const void *alpha,
CBLAS_TEST_ZOMPLEX *a, int *lda, const void *x, int *incx,
const void *beta, void *y, int *incy) {
CBLAS_TEST_ZOMPLEX *A;
int i,j,LDA;

View File

@@ -349,13 +349,13 @@
CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL ZCHK3( SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NKB, KB, NINC, INC,
$ NMAX, INCMAX, A, AA, AS, Y, YY, YS, YT, G, Z,
$ 1 )
$ 1 )
END IF
GO TO 200
* Test ZGERC, 12, ZGERU, 13.

View File

@@ -330,13 +330,13 @@
CALL ZCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL ZCHK3(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NMAX, AB,
$ AA, AS, AB( 1, NMAX + 1 ), BB, BS, CT, G, C,
$ 1 )
$ 1 )
END IF
GO TO 190
* Test ZHERK, 06, ZSYRK, 07.
@@ -358,13 +358,13 @@
CALL ZCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
$ 0 )
$ 0 )
END IF
IF (RORDER) THEN
CALL ZCHK5(SNAMES( ISNUM ), EPS, THRESH, NOUT, NTRA, TRACE,
$ REWI, FATAL, NIDIM, IDIM, NALF, ALF, NBET, BET,
$ NMAX, AB, AA, AS, BB, BS, C, CC, CS, CT, G, W,
$ 1 )
$ 1 )
END IF
GO TO 190
*
@@ -708,9 +708,9 @@
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
C $ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3,
C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
9995 FORMAT( 1X, I6, ': ', A12,'(''', A1, ''',''', A1, ''',',
$ 3( I3, ',' ), '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3,
$ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1034,9 +1034,9 @@ C $ ',(', F4.1, ',', F4.1, '), C,', I3, ').' )
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
C $ ',', F4.1, '), C,', I3, ') .' )
9995 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
$ ',', F4.1, '), C,', I3, ') .' )
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1386,9 +1386,9 @@ C $ ',', F4.1, '), C,', I3, ') .' )
9998 FORMAT(' ******* FATAL ERROR - PARAMETER NUMBER ', I2, ' WAS CH',
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT(' ******* ', A12,' FAILED ON CALL NUMBER:' )
C 9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ',
C $ ' .' )
9995 FORMAT(1X, I6, ': ', A12,'(', 4( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ') ',
$ ' .' )
9994 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -1769,12 +1769,12 @@ C $ ' .' )
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ',
C $ ' .' )
C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1,
C $ '), C,', I3, ') .' )
9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ F4.1, ', A,', I3, ',', F4.1, ', C,', I3, ') ',
$ ' .' )
9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, ') , A,', I3, ',(', F4.1, ',', F4.1,
$ '), C,', I3, ') .' )
9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -2222,12 +2222,12 @@ C $ '), C,', I3, ') .' )
$ 'ANGED INCORRECTLY *******' )
9996 FORMAT( ' ******* ', A12,' FAILED ON CALL NUMBER:' )
9995 FORMAT( ' THESE ARE THE RESULTS FOR COLUMN ', I3 )
C 9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1,
C $ ', C,', I3, ') .' )
C 9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
C $ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
C $ ',', F4.1, '), C,', I3, ') .' )
9994 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',', F4.1,
$ ', C,', I3, ') .' )
9993 FORMAT(1X, I6, ': ', A12,'(', 2( '''', A1, ''',' ), 2( I3, ',' ),
$ '(', F4.1, ',', F4.1, '), A,', I3, ', B,', I3, ',(', F4.1,
$ ',', F4.1, '), C,', I3, ') .' )
9992 FORMAT(' ******* FATAL ERROR - ERROR-EXIT TAKEN ON VALID CALL *',
$ '******' )
*
@@ -2706,7 +2706,7 @@ C $ ',', F4.1, '), C,', I3, ') .' )
50 CONTINUE
END IF
*
C 60 CONTINUE
60 CONTINUE
LZERES = .TRUE.
GO TO 80
70 CONTINUE

View File

@@ -1,6 +1,5 @@
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${PROJECT_BINARY_DIR})
# sources that need to be compiled twice, once with no flags and once with LOWER
set(UL_SOURCES
@@ -73,7 +72,7 @@ GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2)
# special defines for complex
foreach (float_type ${FLOAT_TYPES})
if (USE_THREAD)
if (SMP)
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type})
@@ -107,7 +106,7 @@ foreach (float_type ${FLOAT_TYPES})
GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type})
endforeach()
if (USE_THREAD)
if (SMP)
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type})
@@ -186,7 +185,7 @@ foreach (float_type ${FLOAT_TYPES})
GenerateCombinationObjects("${l_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TU" false ${float_type})
endforeach ()
if (USE_THREAD)
if (SMP)
GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type})
foreach(nu_smp_source ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_source})
@@ -197,7 +196,7 @@ foreach (float_type ${FLOAT_TYPES})
endif ()
endforeach ()
if (USE_THREAD)
if (SMP)
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
endif ()

View File

@@ -62,13 +62,13 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha,
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) + 4095) & ~4095);
// gemvbuffer = bufferX;
gemvbuffer = bufferX;
COPY_K(M, y, incy, Y, 1);
}
if (incx != 1) {
X = bufferX;
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) + 4095) & ~4095);
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(N, x, incx, X, 1);
}

View File

@@ -96,7 +96,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
COPY_K(args -> m, x, incx, buffer, 1);
x = buffer;
// buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
}
#endif
@@ -230,10 +230,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
#ifndef TRANSA
range_m[num_cpu] = num_cpu * ((m + 15) & ~15);
if (range_m[num_cpu] > m * num_cpu) range_m[num_cpu] = m * num_cpu;
#else
range_m[num_cpu] = num_cpu * ((n + 15) & ~15);
if (range_m[num_cpu] > n * num_cpu) range_m[num_cpu] = n * num_cpu;
#endif
queue[num_cpu].mode = mode;

View File

@@ -62,36 +62,9 @@
#endif
#endif
#ifndef thread_local
# if __STDC_VERSION__ >= 201112 && !defined __STDC_NO_THREADS__
# define thread_local _Thread_local
# elif defined _WIN32 && ( \
defined _MSC_VER || \
defined __ICL || \
defined __DMC__ || \
defined __BORLANDC__ )
# define thread_local __declspec(thread)
/* note that ICC (linux) and Clang are covered by __GNUC__ */
# elif defined __GNUC__ || \
defined __SUNPRO_C || \
defined __xlC__
# define thread_local __thread
# else
# define UNSAFE
#endif
#endif
#if defined USE_OPENMP
#undef UNSAFE
#endif
#if !defined(TRANSA) && !defined(UNSAFE)
#ifndef TRANSA
#define Y_DUMMY_NUM 1024
#if defined(USE_OPENMP)
static FLOAT y_dummy[Y_DUMMY_NUM];
#pragma omp threadprivate(y_dummy)
# else
static thread_local FLOAT y_dummy[Y_DUMMY_NUM];
# endif
#endif
static int gemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
@@ -132,12 +105,10 @@ static int gemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#ifdef TRANSA
y += n_from * incy * COMPSIZE;
#else
# ifndef UNSAFE
//for split matrix row (n) direction and vector x of gemv_n
x += n_from * incx * COMPSIZE;
//store partial result for every thread
y += (m_to - m_from) * 1 * COMPSIZE * pos;
# endif
#endif
}
@@ -165,7 +136,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
BLASLONG width, i, num_cpu;
#if !defined(TRANSA) && !defined(UNSAFE)
#ifndef TRANSA
int split_x=0;
#endif
@@ -241,7 +212,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
i -= width;
}
#if !defined(TRANSA) && !defined(UNSAFE)
#ifndef TRANSA
//try to split matrix on row direction and x.
//Then, reduction.
if (num_cpu < nthreads) {
@@ -301,7 +272,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
exec_blas(num_cpu, queue);
}
#if !defined(TRANSA) && !defined(UNSAFE)
#ifndef TRANSA
if(split_x==1){
//reduction
for(i=0; i<num_cpu; i++){

View File

@@ -55,13 +55,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha,
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) + 4095) & ~4095);
// sbmvbuffer = bufferX;
sbmvbuffer = bufferX;
COPY_K(n, y, incy, Y, 1);
}
if (incx != 1) {
X = bufferX;
// sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) + 4095) & ~4095);
sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(n, x, incx, X, 1);
}

View File

@@ -91,7 +91,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
COPY_K(n, x, incx, buffer, 1);
x = buffer;
// buffer += ((COMPSIZE * n + 1023) & ~1023);
buffer += ((COMPSIZE * n + 1023) & ~1023);
}
SCAL_K(n, 0, 0, ZERO,
@@ -246,7 +246,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
@@ -286,7 +285,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
@@ -318,7 +316,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * ((n + 15) & ~15);
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;

View File

@@ -53,13 +53,13 @@ int CNAME(BLASLONG m, FLOAT alpha, FLOAT *a,
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) + 4095) & ~4095);
// gemvbuffer = bufferX;
gemvbuffer = bufferX;
COPY_K(m, y, incy, Y, 1);
}
if (incx != 1) {
X = bufferX;
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) + 4095) & ~4095);
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) + 4095) & ~4095);
COPY_K(m, x, incx, X, 1);
}

View File

@@ -246,7 +246,6 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = spmv_kernel;
@@ -286,7 +285,6 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = spmv_kernel;

View File

@@ -177,8 +177,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode;
queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel;
queue[MAX_CPU_NUMBER - num_cpu - 1].args = &args;
@@ -226,7 +225,6 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = symv_kernel;

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -107,7 +107,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
COPY_K(args -> n, x, incx, buffer, 1);
x = buffer;
// buffer += ((args -> n * COMPSIZE + 1023) & ~1023);
buffer += ((args -> n * COMPSIZE + 1023) & ~1023);
}
if (range_n) y += *range_n * COMPSIZE;
@@ -288,7 +288,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
@@ -328,7 +327,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
@@ -358,7 +356,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n * num_cpu) range_n[num_cpu] = n * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -112,7 +112,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
#endif
x = buffer;
// buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
}
#ifndef TRANS
@@ -234,7 +234,11 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
return 0;
}
#ifndef COMPLEX
int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthreads){
#else
int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthreads){
#endif
blas_arg_t args;
blas_queue_t queue[MAX_CPU_NUMBER];
@@ -303,8 +307,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = tpmv_kernel;
queue[num_cpu].args = &args;
@@ -343,7 +346,6 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = tpmv_kernel;

View File

@@ -59,7 +59,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
min_i = MIN(m - is, DTB_ENTRIES);
#ifndef TRANSA
if (is > 0){
if (is > 0){
GEMV_N(is, min_i, 0, dp1,
a + is * lda, lda,
B + is, 1,

View File

@@ -346,7 +346,6 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
@@ -386,7 +385,6 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;

View File

@@ -83,13 +83,13 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + M * sizeof(FLOAT) * 2 + 4095) & ~4095);
// gemvbuffer = bufferX;
gemvbuffer = bufferX;
COPY_K(M, y, incy, Y, 1);
}
if (incx != 1) {
X = bufferX;
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) * 2 + 4095) & ~4095);
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + N * sizeof(FLOAT) * 2 + 4095) & ~4095);
COPY_K(N, x, incx, X, 1);
}

View File

@@ -61,13 +61,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
// sbmvbuffer = bufferX;
sbmvbuffer = bufferX;
COPY_K(n, y, incy, Y, 1);
}
if (incx != 1) {
X = bufferX;
// sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
COPY_K(n, x, incx, X, 1);
}

View File

@@ -56,13 +56,13 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
// gemvbuffer = bufferX;
gemvbuffer = bufferX;
COPY_K(m, y, incy, Y, 1);
}
if (incx != 1) {
X = bufferX;
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
COPY_K(m, x, incx, X, 1);
}

View File

@@ -60,13 +60,13 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i,
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
// sbmvbuffer = bufferX;
sbmvbuffer = bufferX;
COPY_K(n, y, incy, Y, 1);
}
if (incx != 1) {
X = bufferX;
// sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
sbmvbuffer = (FLOAT *)(((BLASLONG)bufferX + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
COPY_K(n, x, incx, X, 1);
}

View File

@@ -55,13 +55,13 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i,
if (incy != 1) {
Y = bufferY;
bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
// gemvbuffer = bufferX;
gemvbuffer = bufferX;
COPY_K(m, y, incy, Y, 1);
}
if (incx != 1) {
X = bufferX;
// gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
gemvbuffer = (FLOAT *)(((BLASLONG)bufferX + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
COPY_K(m, x, incx, X, 1);
}

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dp1 = 1.;
const static FLOAT dp1 = 1.;
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){

View File

@@ -40,7 +40,7 @@
#include <ctype.h>
#include "common.h"
// const static FLOAT dm1 = -1.;
const static FLOAT dm1 = -1.;
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){

Some files were not shown because too many files have changed in this diff Show More