Merge branch 'cmake' of https://github.com/hpanderson/OpenBLAS into hpanderson_cmake

This commit is contained in:
Zhang Xianyi 2015-07-22 04:06:39 +08:00
commit dcd5ba4443
36 changed files with 3206 additions and 52 deletions

2
.gitignore vendored
View File

@ -65,3 +65,5 @@ test/sblat3
test/zblat1
test/zblat2
test/zblat3
build
build.*

125
CMakeLists.txt Normal file
View File

@ -0,0 +1,125 @@
##
## Author: Hank Anderson <hank@statease.com>
##
cmake_minimum_required(VERSION 2.8.4)
project(OpenBLAS)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 2)
set(OpenBLAS_PATCH_VERSION 13)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
enable_language(ASM)
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
if (NOT DYNAMIC_ARCH)
list(APPEND BLASDIRS kernel)
endif ()
if (DEFINED UTEST_CHECK)
set(SANITY_CHECK 1)
endif ()
if (DEFINED SANITY_CHECK)
list(APPEND BLASDIRS reference)
endif ()
set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
set(BUILD_COMPLEX16 true)
endif ()
set(FLOAT_TYPES "")
if (BUILD_SINGLE)
message(STATUS "Building Single Precision")
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
endif ()
if (BUILD_DOUBLE)
message(STATUS "Building Double Precision")
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
endif ()
if (BUILD_COMPLEX)
message(STATUS "Building Complex Precision")
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
endif ()
if (BUILD_COMPLEX16)
message(STATUS "Building Double Complex Precision")
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
# all :: libs netlib tests shared
# libs :
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
endif ()
if (${NO_STATIC} AND ${NO_SHARED})
message(FATAL_ERROR "Neither static nor shared are enabled.")
endif ()
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
set(TARGET_OBJS "")
foreach (SUBDIR ${SUBDIRS})
add_subdirectory(${SUBDIR})
string(REPLACE "/" "_" subdir_obj ${SUBDIR})
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>")
endforeach ()
# netlib:
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NOFORTRAN)
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake")
endif ()
if (NOT NO_LAPACKE)
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake")
endif ()
# add objects to the openblas lib
add_library(openblas ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS})
# TODO: Why is the config saved here? Is this necessary with CMake?
#Save the config files for installation
# @cp Makefile.conf Makefile.conf_last
# @cp config.h config_last.h
#ifdef QUAD_PRECISION
# @echo "#define QUAD_PRECISION">> config_last.h
#endif
#ifeq ($(EXPRECISION), 1)
# @echo "#define EXPRECISION">> config_last.h
#endif
###
#ifeq ($(DYNAMIC_ARCH), 1)
# @$(MAKE) -C kernel commonlibs || exit 1
# @for d in $(DYNAMIC_CORE) ; \
# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
# done
# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last
#endif
#ifdef USE_THREAD
# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
#endif
# @touch lib.grd

115
cmake/arch.cmake Normal file
View File

@ -0,0 +1,115 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets various variables based on architecture.
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
if (${ARCH} STREQUAL "x86")
if (NOT BINARY)
set(NO_BINARY_MODE 1)
endif ()
endif ()
if (NOT NO_EXPRECISION)
if (${F_COMPILER} MATCHES "GFORTRAN")
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
set(EXPRECISION 1)
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
set(EXPRECISION 1)
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
endif ()
endif ()
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
endif ()
if (USE_OPENMP)
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
message(WARNING "Clang doesn't support OpenMP yet.")
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
endif ()
endif ()
if (DYNAMIC_ARCH)
if (${ARCH} STREQUAL "x86")
set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
endif ()
if (${ARCH} STREQUAL "x86_64")
set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO")
if (NOT NO_AVX)
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER")
endif ()
if (NOT NO_AVX2)
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL")
endif ()
endif ()
if (NOT DYNAMIC_CORE)
unset(DYNAMIC_ARCH)
endif ()
endif ()
if (${ARCH} STREQUAL "ia64")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
if (${F_COMPILER} MATCHES "GFORTRAN")
if (${CMAKE_C_COMPILER} STREQUAL "GNU")
# EXPRECISION = 1
# CCOMMON_OPT += -DEXPRECISION
endif ()
endif ()
endif ()
if (${ARCH} STREQUAL "mips64")
set(NO_BINARY_MODE 1)
endif ()
if (${ARCH} STREQUAL "alpha")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()
if (${ARCH} STREQUAL "arm")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()
if (${ARCH} STREQUAL "arm64")
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()

84
cmake/c_check.cmake Normal file
View File

@ -0,0 +1,84 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from the OpenBLAS/c_check perl script.
## This is triggered by prebuild.cmake and runs before any of the code is built.
## Creates config.h and Makefile.conf.
# CMake vars set by this file:
# OSNAME (use CMAKE_SYSTEM_NAME)
# ARCH
# C_COMPILER (use CMAKE_C_COMPILER)
# BINARY32
# BINARY64
# FU
# CROSS_SUFFIX
# CROSS
# CEXTRALIB
# Defines set by this file:
# OS_
# ARCH_
# C_
# __32BIT__
# __64BIT__
# FUNDERSCORE
# PTHREAD_CREATE_FUNC
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables.
# TODO: detect FU (front underscore) by compiling ctest1.c
set(FU "_")
# Convert CMake vars into the format that OpenBLAS expects
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS)
if (${HOST_OS} STREQUAL "WINDOWS")
set(HOST_OS WINNT)
endif ()
# added by hpa - check size of void ptr to detect 64-bit compile
if (NOT DEFINED BINARY)
set(BINARY 32)
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
set(BINARY 64)
endif ()
endif ()
if (BINARY EQUAL 64)
set(BINARY64 1)
else ()
set(BINARY32 1)
endif ()
# CMake docs define these:
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for.
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
#
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
if (${ARCH} STREQUAL "AMD64")
set(ARCH "x86_64")
endif ()
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32)
set(ARCH x86)
endif ()
if (${ARCH} STREQUAL "X86")
set(ARCH x86)
endif ()
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID})
if (${COMPILER_ID} STREQUAL "GNU")
set(COMPILER_ID "GCC")
endif ()
string(TOUPPER ${ARCH} UC_ARCH)
file(WRITE ${TARGET_CONF}
"#define OS_${HOST_OS}\t1\n"
"#define ARCH_${UC_ARCH}\t1\n"
"#define C_${COMPILER_ID}\t1\n"
"#define __${BINARY}BIT__\t1\n"
"#define FUNDERSCORE\t${FU}\n")

103
cmake/cc.cmake Normal file
View File

@ -0,0 +1,103 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets C related variables.
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -Wall")
set(COMMON_PROF "${COMMON_PROF} -fno-inline")
set(NO_UNINITIALIZED_WARN "-Wno-uninitialized")
if (QUIET_MAKE)
set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused")
endif ()
if (NO_BINARY_MODE)
if (${ARCH} STREQUAL "mips64")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32")
endif ()
set(BINARY_DEFINED 1)
endif ()
if (${CORE} STREQUAL "LOONGSON3A")
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
endif ()
if (${CORE} STREQUAL "LOONGSON3B")
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64")
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64")
endif ()
if (${OSNAME} STREQUAL "AIX")
set(BINARY_DEFINED 1)
endif ()
endif ()
if (NOT BINARY_DEFINED)
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
endif ()
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7")
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
if (${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -n32")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -n64")
endif ()
if (${CORE} STREQUAL "LOONGSON3A")
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
endif ()
if (${CORE} STREQUAL "LOONGSON3B")
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static")
endif ()
else ()
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
endif ()
endif ()
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "SUN")
set(CCOMMON_OPT "${CCOMMON_OPT} -w")
if (${ARCH} STREQUAL "x86")
set(CCOMMON_OPT "${CCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
endif ()

60
cmake/f_check.cmake Normal file
View File

@ -0,0 +1,60 @@
##
## Author: Hank Anderson <hank@statease.com>
## Copyright: (c) Stat-Ease, Inc.
## Created: 12/29/14
## Last Modified: 12/29/14
## Description: Ported from the OpenBLAS/f_check perl script.
## This is triggered by prebuild.cmake and runs before any of the code is built.
## Appends Fortran information to config.h and Makefile.conf.
# CMake vars set by this file:
# F_COMPILER
# FC
# BU
# NOFORTRAN
# NEED2UNDERSCORES
# FEXTRALIB
# Defines set by this file:
# BUNDERSCORE
# NEEDBUNDERSCORE
# NEED2UNDERSCORES
if (MSVC)
# had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa
include(CMakeForceCompiler)
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU)
endif ()
enable_language(Fortran)
if (NOT ONLY_CBLAS)
# N.B. f_check is not cross-platform, so instead try to use CMake variables
# run f_check (appends to TARGET files)
# message(STATUS "Running f_check...")
# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER}
# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile
# TODO: set FEXTRALIB flags a la f_check?
set(BU "_")
file(APPEND ${TARGET_CONF}
"#define BUNDERSCORE _\n"
"#define NEEDBUNDERSCORE 1\n"
"#define NEED2UNDERSCORES 0\n")
else ()
#When we only build CBLAS, we set NOFORTRAN=2
set(NOFORTRAN 2)
set(NO_FBLAS 1)
#set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler
set(BU "_")
file(APPEND ${TARGET_CONF}
"#define BUNDERSCORE _\n"
"#define NEEDBUNDERSCORE 1\n")
endif()
get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE)
string(TOUPPER ${F_COMPILER} F_COMPILER)

200
cmake/fc.cmake Normal file
View File

@ -0,0 +1,200 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets Fortran related variables.
if (${F_COMPILER} STREQUAL "G77")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
if (NOT NO_BINARY_MODE)
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif ()
endif ()
endif ()
if (${F_COMPILER} STREQUAL "G95")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
if (NOT NO_BINARY_MODE)
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif ()
endif ()
endif ()
if (${F_COMPILER} STREQUAL "GFORTRAN")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall")
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
if (NOT NO_LAPACK)
set(EXTRALIB "{EXTRALIB} -lgfortran")
endif ()
if (NO_BINARY_MODE)
if (${ARCH} STREQUAL "mips64")
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
endif ()
endif ()
else ()
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
endif ()
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
endif ()
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "INTEL")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "FUJITSU")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "IBM")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM")
# FCOMMON_OPT += -qarch=440
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -q64")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8")
endif ()
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -q32")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "PGI")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI")
set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "PATHSCALE")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
endif ()
if (NOT ${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
else ()
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
endif ()
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "OPEN64")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64")
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
endif ()
if (${ARCH} STREQUAL "mips64")
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -n32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -n64")
endif ()
if (${CORE} STREQUAL "LOONGSON3A")
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
endif ()
if (${CORE} STREQUAL "LOONGSON3B")
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static")
endif ()
else ()
if (NOT BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
endif ()
if (USE_OPENMP)
set(FEXTRALIB "${FEXTRALIB} -lstdc++")
set(FCOMMON_OPT "${FCOMMON_OPT} -mp")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "SUN")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN")
if (${ARCH} STREQUAL "x86")
set(FCOMMON_OPT "${FCOMMON_OPT} -m32")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "COMPAQ")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()
endif ()
# from the root Makefile - this is for lapack-netlib to compile the correct secnd file.
if (${F_COMPILER} STREQUAL "GFORTRAN")
set(TIMER "INT_ETIME")
else ()
set(TIMER "NONE")
endif ()

158
cmake/kernel.cmake Normal file
View File

@ -0,0 +1,158 @@
# helper functions for the kernel CMakeLists.txt
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file.
macro(SetDefaultL1)
set(SAMAXKERNEL amax.S)
set(DAMAXKERNEL amax.S)
set(QAMAXKERNEL amax.S)
set(CAMAXKERNEL zamax.S)
set(ZAMAXKERNEL zamax.S)
set(XAMAXKERNEL zamax.S)
set(SAMINKERNEL amin.S)
set(DAMINKERNEL amin.S)
set(QAMINKERNEL amin.S)
set(CAMINKERNEL zamin.S)
set(ZAMINKERNEL zamin.S)
set(XAMINKERNEL zamin.S)
set(SMAXKERNEL max.S)
set(DMAXKERNEL max.S)
set(QMAXKERNEL max.S)
set(SMINKERNEL min.S)
set(DMINKERNEL min.S)
set(QMINKERNEL min.S)
set(ISAMAXKERNEL iamax.S)
set(IDAMAXKERNEL iamax.S)
set(IQAMAXKERNEL iamax.S)
set(ICAMAXKERNEL izamax.S)
set(IZAMAXKERNEL izamax.S)
set(IXAMAXKERNEL izamax.S)
set(ISAMINKERNEL iamin.S)
set(IDAMINKERNEL iamin.S)
set(IQAMINKERNEL iamin.S)
set(ICAMINKERNEL izamin.S)
set(IZAMINKERNEL izamin.S)
set(IXAMINKERNEL izamin.S)
set(ISMAXKERNEL iamax.S)
set(IDMAXKERNEL iamax.S)
set(IQMAXKERNEL iamax.S)
set(ISMINKERNEL iamin.S)
set(IDMINKERNEL iamin.S)
set(IQMINKERNEL iamin.S)
set(SASUMKERNEL asum.S)
set(DASUMKERNEL asum.S)
set(CASUMKERNEL zasum.S)
set(ZASUMKERNEL zasum.S)
set(QASUMKERNEL asum.S)
set(XASUMKERNEL zasum.S)
set(SAXPYKERNEL axpy.S)
set(DAXPYKERNEL axpy.S)
set(CAXPYKERNEL zaxpy.S)
set(ZAXPYKERNEL zaxpy.S)
set(QAXPYKERNEL axpy.S)
set(XAXPYKERNEL zaxpy.S)
set(SCOPYKERNEL copy.S)
set(DCOPYKERNEL copy.S)
set(CCOPYKERNEL zcopy.S)
set(ZCOPYKERNEL zcopy.S)
set(QCOPYKERNEL copy.S)
set(XCOPYKERNEL zcopy.S)
set(SDOTKERNEL dot.S)
set(DDOTKERNEL dot.S)
set(CDOTKERNEL zdot.S)
set(ZDOTKERNEL zdot.S)
set(QDOTKERNEL dot.S)
set(XDOTKERNEL zdot.S)
set(SNRM2KERNEL nrm2.S)
set(DNRM2KERNEL nrm2.S)
set(QNRM2KERNEL nrm2.S)
set(CNRM2KERNEL znrm2.S)
set(ZNRM2KERNEL znrm2.S)
set(XNRM2KERNEL znrm2.S)
set(SROTKERNEL rot.S)
set(DROTKERNEL rot.S)
set(QROTKERNEL rot.S)
set(CROTKERNEL zrot.S)
set(ZROTKERNEL zrot.S)
set(XROTKERNEL zrot.S)
set(SSCALKERNEL scal.S)
set(DSCALKERNEL scal.S)
set(CSCALKERNEL zscal.S)
set(ZSCALKERNEL zscal.S)
set(QSCALKERNEL scal.S)
set(XSCALKERNEL zscal.S)
set(SSWAPKERNEL swap.S)
set(DSWAPKERNEL swap.S)
set(CSWAPKERNEL zswap.S)
set(ZSWAPKERNEL zswap.S)
set(QSWAPKERNEL swap.S)
set(XSWAPKERNEL zswap.S)
set(SGEMVNKERNEL gemv_n.S)
set(SGEMVTKERNEL gemv_t.S)
set(DGEMVNKERNEL gemv_n.S)
set(DGEMVTKERNEL gemv_t.S)
set(CGEMVNKERNEL zgemv_n.S)
set(CGEMVTKERNEL zgemv_t.S)
set(ZGEMVNKERNEL zgemv_n.S)
set(ZGEMVTKERNEL zgemv_t.S)
set(QGEMVNKERNEL gemv_n.S)
set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S)
set(SCABS_KERNEL cabs.S)
set(DCABS_KERNEL cabs.S)
set(QCABS_KERNEL cabs.S)
set(LSAME_KERNEL lsame.S)
set(SAXPBYKERNEL ../arm/axpby.c)
set(DAXPBYKERNEL ../arm/axpby.c)
set(CAXPBYKERNEL ../arm/zaxpby.c)
set(ZAXPBYKERNEL ../arm/zaxpby.c)
endmacro ()
macro(SetDefaultL2)
set(SGEMVNKERNEL gemv_n.S)
set(SGEMVTKERNEL gemv_t.S)
set(DGEMVNKERNEL gemv_n.S)
set(DGEMVTKERNEL gemv_t.S)
set(CGEMVNKERNEL zgemv_n.S)
set(CGEMVTKERNEL zgemv_t.S)
set(ZGEMVNKERNEL zgemv_n.S)
set(ZGEMVTKERNEL zgemv_t.S)
set(QGEMVNKERNEL gemv_n.S)
set(QGEMVTKERNEL gemv_t.S)
set(XGEMVNKERNEL zgemv_n.S)
set(XGEMVTKERNEL zgemv_t.S)
set(SGERKERNEL ../generic/ger.c)
set(DGERKERNEL ../generic/ger.c)
set(QGERKERNEL ../generic/ger.c)
set(CGERUKERNEL ../generic/zger.c)
set(CGERCKERNEL ../generic/zger.c)
set(ZGERUKERNEL ../generic/zger.c)
set(ZGERCKERNEL ../generic/zger.c)
set(XGERUKERNEL ../generic/zger.c)
set(XGERCKERNEL ../generic/zger.c)
set(SSYMV_U_KERNEL ../generic/symv_k.c)
set(SSYMV_L_KERNEL ../generic/symv_k.c)
set(DSYMV_U_KERNEL ../generic/symv_k.c)
set(DSYMV_L_KERNEL ../generic/symv_k.c)
set(QSYMV_U_KERNEL ../generic/symv_k.c)
set(QSYMV_L_KERNEL ../generic/symv_k.c)
set(CSYMV_U_KERNEL ../generic/zsymv_k.c)
set(CSYMV_L_KERNEL ../generic/zsymv_k.c)
set(ZSYMV_U_KERNEL ../generic/zsymv_k.c)
set(ZSYMV_L_KERNEL ../generic/zsymv_k.c)
set(XSYMV_U_KERNEL ../generic/zsymv_k.c)
set(XSYMV_L_KERNEL ../generic/zsymv_k.c)
set(CHEMV_U_KERNEL ../generic/zhemv_k.c)
set(CHEMV_L_KERNEL ../generic/zhemv_k.c)
set(CHEMV_V_KERNEL ../generic/zhemv_k.c)
set(CHEMV_M_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_U_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_L_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_V_KERNEL ../generic/zhemv_k.c)
set(ZHEMV_M_KERNEL ../generic/zhemv_k.c)
set(XHEMV_U_KERNEL ../generic/zhemv_k.c)
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
endmacro ()

347
cmake/lapack.cmake Normal file
View File

@ -0,0 +1,347 @@
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
set(ALLAUX
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f
../INSTALL/ilaver.f ../INSTALL/slamch.f
)
set(SCLAUX
sbdsdc.f
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f
slagts.f slamrg.f slanst.f
slapy2.f slapy3.f slarnv.f
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f
slarrk.f slarrr.f slaneg.f
slartg.f slaruv.f slas2.f slascl.f
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f
ssteqr.f ssterf.f slaisnan.f sisnan.f
slartgp.f slartgs.f
../INSTALL/second_${TIMER}.f
)
set(DZLAUX
dbdsdc.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
dlagts.f dlamrg.f dlanst.f
dlapy2.f dlapy3.f dlarnv.f
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f
dlarrk.f dlarrr.f dlaneg.f
dlartg.f dlaruv.f dlas2.f dlascl.f
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f
)
set(SLASRC
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f
sgetc2.f sgetri.f
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f
sggglm.f sgghrd.f sgglse.f sggqrf.f
sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
slansy.f slantb.f slantp.f slantr.f slanv2.f
slapll.f slapmt.f
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
sorgrq.f sorgtr.f sorm2l.f sorm2r.f
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f
spbstf.f spbsv.f spbsvx.f
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f
sposvx.f spstrf.f spstf2.f
sppcon.f sppequ.f
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f
sstevx.f
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f
ssytri_rook.f ssycon_rook.f ssysv_rook.f
stbcon.f
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
stptrs.f
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
strtrs.f stzrqf.f stzrzf.f sstemr.f
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
sgeequb.f ssyequb.f spoequb.f sgbequb.f
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f
)
set(DSLASRC spotrs.f)
set(CLASRC
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
cgesvx.f cgetc2.f cgetri.f
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
cgghrd.f cgglse.f cggqrf.f cggrqf.f
cggsvd.f cggsvp.f
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f
chetf2.f chetrd.f
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f
chetrs.f chetrs2.f
chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f
chgeqz.f chpcon.f chpev.f chpevd.f
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f
chpsvx.f
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f
claed0.f claed7.f claed8.f
claein.f claesy.f claev2.f clags2.f clagtm.f
clahef.f clahef_rook.f clahqr.f
clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
clanhb.f clanhe.f
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
clarf.f clarfb.f clarfg.f clarft.f clarfgp.f
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
cposv.f cposvx.f cpstrf.f cpstf2.f
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
crot.f cspcon.f csprfs.f cspsv.f
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
cstegr.f cstein.f csteqr.f
csycon.f
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f
csyswapr.f csytrs.f csytrs2.f csyconv.f
csytf2_rook.f csytrf_rook.f csytrs_rook.f
csytri_rook.f csycon_rook.f csysv_rook.f
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
ctprfs.f ctptri.f
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f
)
set(ZCLASRC cpotrs.f)
set(DLASRC
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f
dgetc2.f dgetri.f
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f
dggglm.f dgghrd.f dgglse.f dggqrf.f
dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
dlapll.f dlapmt.f
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
dlargv.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
dorgrq.f dorgtr.f dorm2l.f dorm2r.f
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f
dpbstf.f dpbsv.f dpbsvx.f
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f
dposvx.f dpotrs.f dpstrf.f dpstf2.f
dppcon.f dppequ.f
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f
dstevx.f
dsycon.f dsyev.f dsyevd.f dsyevr.f
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
dsysv.f dsysvx.f
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f
dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f
dsytri_rook.f dsycon_rook.f dsysv_rook.f
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
dtptrs.f
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f
dgeequb.f dsyequb.f dpoequb.f dgbequb.f
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f
)
set(ZLASRC
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f
zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
zgetri.f
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f
zgghrd.f zgglse.f zggqrf.f zggrqf.f
zggsvd.f zggsvp.f
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f
zhetf2.f zhetrd.f
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f
zhetrs.f zhetrs2.f
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f
zhgeqz.f zhpcon.f zhpev.f zhpevd.f
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f
zhpsvx.f
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f
zlaed0.f zlaed7.f zlaed8.f
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
zlahef.f zlahef_rook.f zlahqr.f
zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
zlangt.f zlanhb.f
zlanhe.f
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
zlarcm.f zlarf.f zlarfb.f
zlarfg.f zlarft.f zlarfgp.f
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
zlassq.f zlasyf.f zlasyf_rook.f
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
zrot.f zspcon.f zsprfs.f zspsv.f
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
zstegr.f zstein.f zsteqr.f
zsycon.f
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f
zsytri_rook.f zsycon_rook.f zsysv_rook.f
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
ztprfs.f ztptri.f
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
zunmtr.f zupgtr.f
zupmtr.f izmax1.f dzsum1.f zstemr.f
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f
)
set(LA_REL_SRC ${ALLAUX})
if (BUILD_SINGLE)
list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX})
endif ()
if (BUILD_DOUBLE)
list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX})
endif ()
if (BUILD_COMPLEX)
list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX})
endif ()
if (BUILD_COMPLEX16)
list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX})
endif ()
# add lapack-netlib folder to the sources
set(LA_SOURCES "")
foreach (LA_FILE ${LA_REL_SRC})
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}")
endforeach ()
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}")

104
cmake/os.cmake Normal file
View File

@ -0,0 +1,104 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from portion of OpenBLAS/Makefile.system
## Detects the OS and sets appropriate variables.
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var
set(MD5SUM "md5 -r")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
set(MD5SUM "md5 -r")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD")
set(MD5SUM "md5 -n")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
set(EXTRALIB "${EXTRALIB} -lm")
set(NO_EXPRECISION 1)
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX")
set(EXTRALIB "${EXTRALIB} -lm")
endif ()
# TODO: this is probably meant for mingw, not other windows compilers
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(NEED_PIC 0)
set(NO_EXPRECISION 1)
set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32")
# probably not going to use these
set(SUFFIX "obj")
set(PSUFFIX "pobj")
set(LIBSUFFIX "a")
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
# Test for supporting MS_ABI
# removed string parsing in favor of CMake's version comparison -hpa
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
# GCC Version >=4.7
# It is compatible with MSVC ABI.
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI")
endif ()
endif ()
# Ensure the correct stack alignment on Win32
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
if (${ARCH} STREQUAL "x86")
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2")
endif ()
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
set(NEED_PIC 0)
set(NO_EXPRECISION 1)
set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin")
endif ()
if (CYGWIN)
set(NEED_PIC 0)
set(NO_EXPRECISION 1)
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix")
if (SMP)
set(EXTRALIB "${EXTRALIB} -lpthread")
endif ()
endif ()
if (QUAD_PRECISION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION")
set(NO_EXPRECISION 1)
endif ()
if (${ARCH} STREQUAL "x86")
set(NO_EXPRECISION 1)
endif ()
if (UTEST_CHECK)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
set(SANITY_CHECK 1)
endif ()
if (SANITY_CHECK)
# TODO: need some way to get $(*F) (target filename)
set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}")
endif ()

108
cmake/prebuild.cmake Normal file
View File

@ -0,0 +1,108 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from OpenBLAS/Makefile.prebuild
## This is triggered by system.cmake and runs before any of the code is built.
## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files).
## Next it runs f_check and appends some fortran information to the files.
## Finally it runs getarch and getarch_2nd for even more environment information.
# CMake vars set by this file:
# CORE
# LIBCORE
# NUM_CORES
# HAVE_MMX
# HAVE_SSE
# HAVE_SSE2
# HAVE_SSE3
# MAKE
# SGEMM_UNROLL_M
# SGEMM_UNROLL_N
# DGEMM_UNROLL_M
# DGEMM_UNROLL_M
# QGEMM_UNROLL_N
# QGEMM_UNROLL_N
# CGEMM_UNROLL_M
# CGEMM_UNROLL_M
# ZGEMM_UNROLL_N
# ZGEMM_UNROLL_N
# XGEMM_UNROLL_M
# XGEMM_UNROLL_N
# CGEMM3M_UNROLL_M
# CGEMM3M_UNROLL_N
# ZGEMM3M_UNROLL_M
# ZGEMM3M_UNROLL_M
# XGEMM3M_UNROLL_N
# XGEMM3M_UNROLL_N
# CPUIDEMU = ../../cpuid/table.o
if (DEFINED CPUIDEMU)
set(EXFLAGS "-DCPUIDEMU -DVENDOR=99")
endif ()
if (DEFINED TARGET_CORE)
# set the C flags for just this file
set(GETARCH2_FLAGS "-DBUILD_KERNEL")
set(TARGET_MAKE "Makefile_kernel.conf")
set(TARGET_CONF "config_kernel.h")
else()
set(TARGET_MAKE "Makefile.conf")
set(TARGET_CONF "config.h")
endif ()
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake")
if (NOT NOFORTRAN)
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake")
endif ()
# compile getarch
set(GETARCH_SRC
${CMAKE_SOURCE_DIR}/getarch.c
${CPUIDEMO}
)
if (NOT MSVC)
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S)
endif ()
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build")
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH_DIR})
try_compile(GETARCH_RESULT ${GETARCH_DIR}
SOURCES ${GETARCH_SRC}
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH_LOG
COPY_FILE ${GETARCH_BIN}
)
message(STATUS "Running getarch")
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND ${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT)
execute_process(COMMAND ${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT)
#message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}")
# append config data from getarch to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT})
ParseGetArchVars(${GETARCH_MAKE_OUT})
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build")
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}")
file(MAKE_DIRECTORY ${GETARCH2_DIR})
try_compile(GETARCH2_RESULT ${GETARCH2_DIR}
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR}
OUTPUT_VARIABLE GETARCH2_LOG
COPY_FILE ${GETARCH2_BIN}
)
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
execute_process(COMMAND ${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT)
execute_process(COMMAND ${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT)
# append config data from getarch_2nd to the TARGET file and read in CMake vars
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT})
ParseGetArchVars(${GETARCH2_MAKE_OUT})

527
cmake/system.cmake Normal file
View File

@ -0,0 +1,527 @@
##
## Author: Hank Anderson <hank@statease.com>
## Description: Ported from OpenBLAS/Makefile.system
##
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib")
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa
# http://stackoverflow.com/questions/714100/os-detecting-makefile
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
if (DEFINED TARGET_CORE)
set(TARGET ${TARGET_CORE})
endif ()
# Force fallbacks for 32bit
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
message(STATUS "Compiling a ${BINARY}-bit binary.")
set(NO_AVX 1)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
set(TARGET "NEHALEM")
endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER")
set(TARGET "BARCELONA")
endif ()
endif ()
if (DEFINED TARGET)
message(STATUS "Targetting the ${TARGET} architecture.")
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
endif ()
if (INTERFACE64)
message(STATUS "Using 64-bit integers.")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT")
endif ()
if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD)
set(GEMM_MULTITHREAD_THRESHOLD 4)
endif ()
message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}")
if (NO_AVX)
message(STATUS "Disabling Advanced Vector Extensions (AVX).")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX")
endif ()
if (NO_AVX2)
message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
endif ()
if (CMAKE_BUILD_TYPE STREQUAL Debug)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -g")
endif ()
# TODO: let CMake handle this? -hpa
#if (${QUIET_MAKE})
# set(MAKE "${MAKE} -s")
#endif()
if (NOT DEFINED NO_PARALLEL_MAKE)
set(NO_PARALLEL_MAKE 0)
endif ()
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}")
if (CMAKE_CXX_COMPILER STREQUAL loongcc)
set(GETARCH_FLAGS "${GETARCH_FLAGS} -static")
endif ()
#if don't use Fortran, it will only compile CBLAS.
if (ONLY_CBLAS)
set(NO_LAPACK 1)
else ()
set(ONLY_CBLAS 0)
endif ()
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake")
if (NOT DEFINED NUM_THREADS)
set(NUM_THREADS ${NUM_CORES})
endif ()
if (${NUM_THREADS} EQUAL 1)
set(USE_THREAD 0)
endif ()
if (DEFINED USE_THREAD)
if (NOT ${USE_THREAD})
unset(SMP)
else ()
set(SMP 1)
endif ()
else ()
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa
if (${NUM_THREADS} EQUAL 1)
unset(SMP)
else ()
set(SMP 1)
endif ()
endif ()
if (${SMP})
message(STATUS "SMP enabled.")
endif ()
if (NOT DEFINED NEED_PIC)
set(NEED_PIC 1)
endif ()
# TODO: I think CMake should be handling all this stuff -hpa
unset(ARFLAGS)
set(CPP "${COMPILER} -E")
set(AR "${CROSS_SUFFIX}ar")
set(AS "${CROSS_SUFFIX}as")
set(LD "${CROSS_SUFFIX}ld")
set(RANLIB "${CROSS_SUFFIX}ranlib")
set(NM "${CROSS_SUFFIX}nm")
set(DLLWRAP "${CROSS_SUFFIX}dllwrap")
set(OBJCOPY "${CROSS_SUFFIX}objcopy")
set(OBJCONV "${CROSS_SUFFIX}objconv")
# OS dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake")
# Architecture dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake")
# C Compiler dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake")
if (NOT NOFORTRAN)
# Fortran Compiler dependent settings
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake")
endif ()
if (BINARY64)
if (INTERFACE64)
# CCOMMON_OPT += -DUSE64BITINT
endif ()
endif ()
if (NEED_PIC)
if (${CMAKE_C_COMPILER} STREQUAL "IBM")
set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC")
endif ()
if (${F_COMPILER} STREQUAL "SUN")
set(FCOMMON_OPT "${FCOMMON_OPT} -pic")
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC")
endif ()
endif ()
if (DYNAMIC_ARCH)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH")
endif ()
if (NO_LAPACK)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK")
#Disable LAPACK C interface
set(NO_LAPACKE 1)
endif ()
if (NO_LAPACKE)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE")
endif ()
if (NO_AVX)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
endif ()
if (${ARCH} STREQUAL "x86")
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX")
endif ()
if (NO_AVX2)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
endif ()
if (SMP)
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER")
if (${ARCH} STREQUAL "mips64")
if (NOT ${CORE} STREQUAL "LOONGSON3B")
set(USE_SIMPLE_THREADED_LEVEL3 1)
endif ()
endif ()
if (USE_OPENMP)
# USE_SIMPLE_THREADED_LEVEL3 = 1
# NO_AFFINITY = 1
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP")
endif ()
if (BIGNUMA)
set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA")
endif ()
endif ()
if (NO_WARMUP)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP")
endif ()
if (CONSISTENT_FPCSR)
set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR")
endif ()
# Only for development
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING")
# set(USE_PAPI 1)
if (USE_PAPI)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI")
set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr")
endif ()
if (DYNAMIC_THREADS)
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS")
endif ()
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
if (USE_SIMPLE_THREADED_LEVEL3)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
endif ()
if (DEFINED LIBNAMESUFFIX)
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
else ()
set(LIBPREFIX "libopenblas")
endif ()
if (NOT DEFINED SYMBOLPREFIX)
set(SYMBOLPREFIX "")
endif ()
if (NOT DEFINED SYMBOLSUFFIX)
set(SYMBOLSUFFIX "")
endif ()
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}")
# TODO: nead to convert these Makefiles
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake
if (${CORE} STREQUAL "PPC440")
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC")
endif ()
if (${CORE} STREQUAL "PPC440FP2")
set(STATIC_ALLOCATION 1)
endif ()
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
set(NO_AFFINITY 1)
endif ()
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B")
set(NO_AFFINITY 1)
endif ()
if (NO_AFFINITY)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY")
endif ()
if (FUNCTION_PROFILE)
set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE")
endif ()
if (HUGETLB_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB")
endif ()
if (DEFINED HUGETLBFILE_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})")
endif ()
if (STATIC_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC")
endif ()
if (DEVICEDRIVER_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"")
endif ()
if (MIXED_MEMORY_ALLOCATION)
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS")
set(TAR gtar)
set(PATCH gpatch)
set(GREP ggrep)
else ()
set(TAR tar)
set(PATCH patch)
set(GREP grep)
endif ()
if (NOT DEFINED MD5SUM)
set(MD5SUM md5sum)
endif ()
set(AWK awk)
set(REVISION "-r${OpenBLAS_VERSION}")
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
if (DEBUG)
set(COMMON_OPT "${COMMON_OPT} -g")
endif ()
if (NOT DEFINED COMMON_OPT)
set(COMMON_OPT "-O2")
endif ()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}")
# TODO: not sure what PFLAGS is -hpa
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}")
# TODO: not sure what FPFLAGS is -hpa
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}")
#For LAPACK Fortran codes.
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}")
set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}")
#Disable -fopenmp for LAPACK Fortran codes on Windows.
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel")
foreach (FILTER_FLAG ${FILTER_FLAGS})
string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS})
string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS})
endforeach ()
endif ()
if ("${F_COMPILER}" STREQUAL "GFORTRAN")
# lapack-netlib is rife with uninitialized warnings -hpa
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized")
endif ()
set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H")
if (INTERFACE64)
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64")
endif ()
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS")
endif ()
if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE")
endif ()
if (NOT DEFINED SUFFIX)
set(SUFFIX o)
endif ()
if (NOT DEFINED PSUFFIX)
set(PSUFFIX po)
endif ()
if (NOT DEFINED LIBSUFFIX)
set(LIBSUFFIX a)
endif ()
if (DYNAMIC_ARCH)
if (DEFINED SMP)
set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}")
else ()
set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}")
endif ()
else ()
if (DEFINED SMP)
set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}")
else ()
set(LIBNAME "${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}")
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}")
endif ()
endif ()
set(LIBDLLNAME "${LIBPREFIX}.dll")
set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so")
set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib")
set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def")
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp")
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip")
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}")
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}")
set(LIB_COMPONENTS BLAS)
if (NOT NO_CBLAS)
set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS")
endif ()
if (NOT NO_LAPACK)
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK")
if (NOT NO_LAPACKE)
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE")
endif ()
endif ()
if (ONLY_CBLAS)
set(LIB_COMPONENTS CBLAS)
endif ()
#export OSNAME
#export ARCH
#export CORE
#export LIBCORE
#export PGCPATH
#export CONFIG
#export CC
#export FC
#export BU
#export FU
#export NEED2UNDERSCORES
#export USE_THREAD
#export NUM_THREADS
#export NUM_CORES
#export SMP
#export MAKEFILE_RULE
#export NEED_PIC
#export BINARY
#export BINARY32
#export BINARY64
#export F_COMPILER
#export C_COMPILER
#export USE_OPENMP
#export CROSS
#export CROSS_SUFFIX
#export NOFORTRAN
#export NO_FBLAS
#export EXTRALIB
#export CEXTRALIB
#export FEXTRALIB
#export HAVE_SSE
#export HAVE_SSE2
#export HAVE_SSE3
#export HAVE_SSSE3
#export HAVE_SSE4_1
#export HAVE_SSE4_2
#export HAVE_SSE4A
#export HAVE_SSE5
#export HAVE_AVX
#export HAVE_VFP
#export HAVE_VFPV3
#export HAVE_VFPV4
#export HAVE_NEON
#export KERNELDIR
#export FUNCTION_PROFILE
#export TARGET_CORE
#
#export SGEMM_UNROLL_M
#export SGEMM_UNROLL_N
#export DGEMM_UNROLL_M
#export DGEMM_UNROLL_N
#export QGEMM_UNROLL_M
#export QGEMM_UNROLL_N
#export CGEMM_UNROLL_M
#export CGEMM_UNROLL_N
#export ZGEMM_UNROLL_M
#export ZGEMM_UNROLL_N
#export XGEMM_UNROLL_M
#export XGEMM_UNROLL_N
#export CGEMM3M_UNROLL_M
#export CGEMM3M_UNROLL_N
#export ZGEMM3M_UNROLL_M
#export ZGEMM3M_UNROLL_N
#export XGEMM3M_UNROLL_M
#export XGEMM3M_UNROLL_N
#if (USE_CUDA)
# export CUDADIR
# export CUCC
# export CUFLAGS
# export CULIB
#endif
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f
#
#.f.$(SUFFIX):
# $(FC) $(FFLAGS) -c $< -o $(@F)
#
#.f.$(PSUFFIX):
# $(FC) $(FPFLAGS) -pg -c $< -o $(@F)
# these are not cross-platform
#ifdef BINARY64
#PATHSCALEPATH = /opt/pathscale/lib/3.1
#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib
#else
#PATHSCALEPATH = /opt/pathscale/lib/3.1/32
#PGIPATH = /opt/pgi/linux86/7.1-5/lib
#endif
#ACMLPATH = /opt/acml/4.3.0
#ifneq ($(OSNAME), Darwin)
#MKLPATH = /opt/intel/mkl/10.2.2.025/lib
#else
#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib
#endif
#ATLASPATH = /opt/atlas/3.9.17/opteron
#FLAMEPATH = $(HOME)/flame/lib
#ifneq ($(OSNAME), SunOS)
#SUNPATH = /opt/sunstudio12.1
#else
#SUNPATH = /opt/SUNWspro
#endif

342
cmake/utils.cmake Normal file
View File

@ -0,0 +1,342 @@
# Functions to help with the OpenBLAS build
# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE
function(ParseGetArchVars GETARCH_IN)
string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}")
foreach (GETARCH_LINE ${GETARCH_RESULT_LIST})
# split the line into var and value, then assign the value to a CMake var
string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}")
list(GET SPLIT_VAR 0 VAR_NAME)
list(GET SPLIT_VAR 1 VAR_VALUE)
set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE)
endforeach ()
endfunction ()
# Reads a Makefile into CMake vars.
macro(ParseMakefileVars MAKEFILE_IN)
message(STATUS "Reading vars from ${MAKEFILE_IN}...")
file(STRINGS ${MAKEFILE_IN} makefile_contents)
foreach (makefile_line ${makefile_contents})
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
set(var_name ${CMAKE_MATCH_1})
set(var_value ${CMAKE_MATCH_2})
# check for Makefile variables in the string, e.g. $(TSUFFIX)
string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value})
foreach (make_var ${make_var_matches})
# strip out Makefile $() markup
string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var})
# now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote)
string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value})
endforeach ()
set(${var_name} ${var_value})
else ()
string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1})
endif ()
endif ()
endforeach ()
endmacro ()
# Returns all combinations of the input list, as a list with colon-separated combinations
# E.g. input of A B C returns A B C A:B A:C B:C
# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")).
# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present.
# @returns LIST_OUT a list of combinations
# CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen
function(AllCombinations list_in absent_codes_in)
list(LENGTH list_in list_count)
set(num_combos 1)
# subtract 1 since we will iterate from 0 to num_combos
math(EXPR num_combos "(${num_combos} << ${list_count}) - 1")
set(LIST_OUT "")
set(CODES_OUT "")
foreach (c RANGE 0 ${num_combos})
set(current_combo "")
set(current_code "")
# this is a little ridiculous just to iterate through a list w/ indices
math(EXPR last_list_index "${list_count} - 1")
foreach (list_index RANGE 0 ${last_list_index})
math(EXPR bit "1 << ${list_index}")
math(EXPR combo_has_bit "${c} & ${bit}")
list(GET list_in ${list_index} list_elem)
if (combo_has_bit)
if (current_combo)
set(current_combo "${current_combo}:${list_elem}")
else ()
set(current_combo ${list_elem})
endif ()
string(SUBSTRING ${list_elem} 0 1 code_char)
else ()
list(GET absent_codes_in ${list_index} code_char)
endif ()
set(current_code "${current_code}${code_char}")
endforeach ()
if (current_combo STREQUAL "")
list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space
else ()
list(APPEND LIST_OUT ${current_combo})
endif ()
list(APPEND CODES_OUT ${current_code})
endforeach ()
set(LIST_OUT ${LIST_OUT} PARENT_SCOPE)
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE)
endfunction ()
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
# @param sources_in the source files to build from
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
# e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax"
# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU)
# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters)
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types.
# 0 - compiles for all types
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE)
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX)
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c)
# STRING - compiles only the given type (e.g. DOUBLE)
function(GenerateNamedObjects sources_in)
if (DEFINED ARGV1)
set(defines_in ${ARGV1})
endif ()
if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "")
set(name_in ${ARGV2})
# strip off extension for kernel files that pass in the object name.
get_filename_component(name_in ${name_in} NAME_WE)
endif ()
if (DEFINED ARGV3)
set(use_cblas ${ARGV3})
else ()
set(use_cblas false)
endif ()
if (DEFINED ARGV4)
set(replace_last_with ${ARGV4})
endif ()
if (DEFINED ARGV5)
set(append_with ${ARGV5})
endif ()
if (DEFINED ARGV6)
set(no_float_type ${ARGV6})
else ()
set(no_float_type false)
endif ()
if (no_float_type)
set(float_list "DUMMY") # still need to loop once
else ()
set(float_list "${FLOAT_TYPES}")
endif ()
set(real_only false)
set(complex_only false)
set(mangle_complex_sources false)
if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "")
if (${ARGV7} EQUAL 1)
set(real_only true)
elseif (${ARGV7} EQUAL 2)
set(complex_only true)
elseif (${ARGV7} EQUAL 3)
set(mangle_complex_sources true)
elseif (NOT ${ARGV7} EQUAL 0)
set(float_list ${ARGV7})
endif ()
endif ()
if (complex_only)
list(REMOVE_ITEM float_list "SINGLE")
list(REMOVE_ITEM float_list "DOUBLE")
elseif (real_only)
list(REMOVE_ITEM float_list "COMPLEX")
list(REMOVE_ITEM float_list "ZCOMPLEX")
endif ()
set(float_char "")
set(OBJ_LIST_OUT "")
foreach (float_type ${float_list})
foreach (source_file ${sources_in})
if (NOT no_float_type)
string(SUBSTRING ${float_type} 0 1 float_char)
string(TOLOWER ${float_char} float_char)
endif ()
if (NOT name_in)
get_filename_component(source_name ${source_file} NAME_WE)
set(obj_name "${float_char}${source_name}")
else ()
# replace * with float_char
if (${name_in} MATCHES "\\*")
string(REPLACE "*" ${float_char} obj_name ${name_in})
else ()
set(obj_name "${float_char}${name_in}")
endif ()
endif ()
if (replace_last_with)
string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name})
else ()
set(obj_name "${obj_name}${append_with}")
endif ()
# now add the object and set the defines
set(obj_defines ${defines_in})
if (use_cblas)
set(obj_name "cblas_${obj_name}")
list(APPEND obj_defines "CBLAS")
endif ()
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "DOUBLE")
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "COMPLEX")
if (mangle_complex_sources)
# add a z to the filename
get_filename_component(source_name ${source_file} NAME)
get_filename_component(source_dir ${source_file} DIRECTORY)
string(REPLACE ${source_name} "z${source_name}" source_file ${source_file})
endif ()
endif ()
if (VERBOSE_GEN)
message(STATUS "${obj_name}:${source_file}")
message(STATUS "${obj_defines}")
endif ()
# create a copy of the source to avoid duplicate obj filename problem with ar.exe
get_filename_component(source_extension ${source_file} EXT)
set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}")
if (IS_ABSOLUTE ${source_file})
set(old_source_file ${source_file})
else ()
set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}")
endif ()
string(REPLACE ";" "\n#define " define_source "${obj_defines}")
string(REPLACE "=" " " define_source "${define_source}")
file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"")
list(APPEND SRC_LIST_OUT ${new_source_file})
endforeach ()
endforeach ()
list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT})
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
endfunction ()
# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in
# @param sources_in the source files to build from
# @param defines_in the preprocessor definitions that will be combined to create the object files
# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects
# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU.
# If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU.
# If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU.
# If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects).
# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel
# @param alternate_name replaces the source name as the object name (define codes are still appended)
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
# @param complex_filename_scheme see GenerateNamedObjects
function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme)
set(alternate_name_in "")
if (DEFINED ARGV5)
set(alternate_name_in ${ARGV5})
endif ()
set(no_float_type false)
if (DEFINED ARGV6)
set(no_float_type ${ARGV6})
endif ()
set(complex_filename_scheme "")
if (DEFINED ARGV7)
set(complex_filename_scheme ${ARGV7})
endif ()
AllCombinations("${defines_in}" "${absent_codes_in}")
set(define_combos ${LIST_OUT})
set(define_codes ${CODES_OUT})
list(LENGTH define_combos num_combos)
math(EXPR num_combos "${num_combos} - 1")
foreach (c RANGE 0 ${num_combos})
list(GET define_combos ${c} define_combo)
list(GET define_codes ${c} define_code)
foreach (source_file ${sources_in})
set(alternate_name ${alternate_name_in})
# replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with
string(REPLACE ":" ";" define_combo ${define_combo})
# now add the object and set the defines
set(cur_defines ${define_combo})
if ("${cur_defines}" STREQUAL " ")
set(cur_defines ${all_defines_in})
else ()
list(APPEND cur_defines ${all_defines_in})
endif ()
set(replace_code "")
set(append_code "")
if (replace_scheme EQUAL 1)
set(replace_code ${define_code})
else ()
if (replace_scheme EQUAL 2)
set(append_code "_${define_code}")
elseif (replace_scheme EQUAL 3)
if ("${alternate_name}" STREQUAL "")
string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file})
else ()
string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name})
endif ()
# first extract the last letter
string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match
# break the code up into the first letter and the remaining (should only be 2 anyway)
string(SUBSTRING ${define_code} 0 1 define_code_first)
string(SUBSTRING ${define_code} 1 -1 define_code_second)
set(replace_code "${define_code_first}${last_letter}${define_code_second}")
elseif (replace_scheme EQUAL 4)
# insert code before the last underscore and pass that in as the alternate_name
if ("${alternate_name}" STREQUAL "")
get_filename_component(alternate_name ${source_file} NAME_WE)
endif ()
set(extra_underscore "")
# check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel)
string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name})
string(LENGTH "${underscores}" underscores)
if (underscores EQUAL 0)
set(extra_underscore "_")
endif ()
string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name})
else()
set(append_code ${define_code}) # replace_scheme should be 0
endif ()
endif ()
GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}")
endforeach ()
endforeach ()
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE)
endfunction ()

View File

@ -82,7 +82,10 @@ extern "C" {
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if !defined(_MSC_VER)
#include <unistd.h>
#endif
#ifdef OS_LINUX
#include <malloc.h>
@ -313,8 +316,12 @@ typedef int blasint;
#endif
#if defined(OS_WINDOWS)
#if defined(_MSC_VER) && !defined(__clang__)
#define YIELDING YieldProcessor()
#else
#define YIELDING SwitchToThread()
#endif
#endif
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");

View File

@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
float _Complex cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float _Complex cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
double _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);

View File

@ -56,41 +56,65 @@ static void __inline blas_lock(volatile BLASULONG *address){
do {
while (*address) {YIELDING;};
#if defined(_MSC_VER) && !defined(__clang__)
// use intrinsic instead of inline assembly
ret = _InterlockedExchange(address, 1);
// inline assembly
/*__asm {
mov eax, address
mov ebx, 1
xchg [eax], ebx
mov ret, ebx
}*/
#else
__asm__ __volatile__(
"xchgl %0, %1\n"
: "=r"(ret), "=m"(*address)
: "0"(1), "m"(*address)
: "memory");
#endif
} while (ret);
}
static __inline unsigned long long rpcc(void){
#if defined(_MSC_VER) && !defined(__clang__)
return __rdtsc(); // use MSVC intrinsic
#else
unsigned int a, d;
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return ((unsigned long long)a + ((unsigned long long)d << 32));
#endif
};
static __inline unsigned long getstackaddr(void){
#if defined(_MSC_VER) && !defined(__clang__)
return (unsigned long)_ReturnAddress(); // use MSVC intrinsic
#else
unsigned long addr;
__asm__ __volatile__ ("mov %%esp, %0"
: "=r"(addr) : : "memory");
return addr;
#endif
};
static __inline long double sqrt_long(long double val) {
#if defined(_MSC_VER) && !defined(__clang__)
return sqrt(val); // not sure if this will use fsqrt
#else
long double result;
__asm__ __volatile__ ("fldt %1\n"
"fsqrt\n"
"fstpt %0\n" : "=m" (result) : "m"(val));
return result;
#endif
}
#define SQRT(a) sqrt_long(a)
@ -100,7 +124,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
#define WHEREAMI
static inline int WhereAmI(void){
static __inline int WhereAmI(void){
int eax, ebx, ecx, edx;
int apicid;
@ -146,9 +170,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
y = blas_quick_divide_table[y];
#if defined(_MSC_VER) && !defined(__clang__)
(void*)result;
return x*y;
#else
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
return result;
#endif
}
#endif
@ -284,8 +313,12 @@ REALNAME:
#define PROFCODE
#ifdef __clang__
#define EPILOGUE .end
#else
#define EPILOGUE .end REALNAME
#endif
#endif
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__)
#define PROLOGUE \

View File

@ -39,6 +39,10 @@
#ifndef CPUID_H
#define CPUID_H
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define INTEL_AMD
#endif
#define VENDOR_INTEL 1
#define VENDOR_UMC 2
#define VENDOR_AMD 3
@ -59,7 +63,7 @@
#define FAMILY_PM 7
#define FAMILY_IA64 8
#if defined(__i386__) || defined(__x86_64__)
#ifdef INTEL_AMD
#define GET_EXFAMILY 1
#define GET_EXMODEL 2
#define GET_TYPE 3

View File

@ -40,6 +40,12 @@
#include <string.h>
#include "cpuid.h"
#if defined(_MSC_VER) && !defined(__clang__)
#define C_INLINE __inline
#else
#define C_INLINE inline
#endif
/*
#ifdef NO_AVX
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
@ -53,12 +59,26 @@
#endif
*/
#if defined(_MSC_VER) && !defined(__clang__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
{
int cpuInfo[4] = {-1};
__cpuid(cpuInfo, op);
*eax = cpuInfo[0];
*ebx = cpuInfo[1];
*ecx = cpuInfo[2];
*edx = cpuInfo[3];
}
#else
#ifndef CPUIDEMU
#if defined(__APPLE__) && defined(__i386__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
#else
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
__asm__ __volatile__
("mov %%ebx, %%edi;"
@ -115,14 +135,16 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
#endif
static inline int have_cpuid(void){
#endif // _MSC_VER
static C_INLINE int have_cpuid(void){
int eax, ebx, ecx, edx;
cpuid(0, &eax, &ebx, &ecx, &edx);
return eax;
}
static inline int have_excpuid(void){
static C_INLINE int have_excpuid(void){
int eax, ebx, ecx, edx;
cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
@ -130,10 +152,14 @@ static inline int have_excpuid(void){
}
#ifndef NO_AVX
static inline void xgetbv(int op, int * eax, int * edx){
static C_INLINE void xgetbv(int op, int * eax, int * edx){
//Use binary code for xgetbv
#if defined(_MSC_VER) && !defined(__clang__)
*eax = __xgetbv(op);
#else
__asm__ __volatile__
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
#endif
}
#endif

View File

@ -0,0 +1,129 @@
include_directories(${CMAKE_SOURCE_DIR})
# sources that need to be compiled twice, once with no flags and once with LOWER
set(UL_SOURCES
sbmv_k.c
spmv_k.c
spr_k.c
spr2_k.c
syr_k.c
syr2_k.c
)
# sources that need to be compiled several times, for UNIT, TRANSA
set(U_SOURCES
trmv_U.c
tbmv_U.c
tbsv_U.c
tpmv_U.c
tpsv_U.c
trsv_U.c
)
set(L_SOURCES
trmv_L.c
tbmv_L.c
tbsv_L.c
tpmv_L.c
tpsv_L.c
trsv_L.c
)
set(UL_SMP_SOURCES
symv_thread.c
syr_thread.c
syr2_thread.c
spr_thread.c
spr2_thread.c
spmv_thread.c
sbmv_thread.c
)
set(NU_SMP_SOURCES
trmv_thread.c
tpmv_thread.c
tbmv_thread.c
)
# objects that need LOWER set
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3)
# gbmv uses a lowercase n and t
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3)
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3)
# special defines for complex
foreach (float_type ${FLOAT_TYPES})
if (SMP)
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t" false "" "" false ${float_type})
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
foreach (u_source ${U_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${u_source})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NU" false ${float_type})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TL" false ${float_type})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RU" false ${float_type})
GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CL" false ${float_type})
endforeach ()
foreach (l_source ${L_SOURCES})
string(REGEX MATCH "[a-z]+" op_name ${l_source})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NL" false ${float_type})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TU" false ${float_type})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RL" false ${float_type})
GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type})
endforeach ()
if (SMP)
GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ" "gemv_thread_o" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ;TRANSA" "gemv_thread_u" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ" "gemv_thread_s" false "" "" false ${float_type})
GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ;TRANSA" "gemv_thread_d" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "CONJ" "gbmv_thread_r" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "CONJ;TRANSA" "gbmv_thread_c" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ" "gbmv_thread_o" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ;TRANSA" "gbmv_thread_u" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ" "gbmv_thread_s" false "" "" false ${float_type})
GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ;TRANSA" "gbmv_thread_d" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "" "ger_thread_U" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "CONJ" "ger_thread_C" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type})
GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type})
foreach (nu_smp_src ${NU_SMP_SOURCES})
string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=2" 0 "${op_name}_T" false ${float_type})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "${op_name}_R" false ${float_type})
GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "${op_name}_C" false ${float_type})
endforeach ()
endif ()
else ()
# N.B. BLAS wants to put the U/L from the filename in the *MIDDLE*
GenerateCombinationObjects("${U_SOURCES};${L_SOURCES}" "TRANSA;UNIT" "N;N" "" 3 "" false ${float_type})
if (SMP)
GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type})
GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "" 2 "" false ${float_type})
endif ()
endif ()
endforeach ()
if (SMP)
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
endif ()
add_library(driver_level2 OBJECT ${OPENBLAS_SRC})

View File

@ -0,0 +1,81 @@
include_directories(${CMAKE_SOURCE_DIR})
set(USE_GEMM3M 0)
if (DEFINED ARCH)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M 1)
endif ()
endif ()
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa
# loop through gemm.c defines
set(GEMM_DEFINES NN NT TN TT)
set(GEMM_COMPLEX_DEFINES RN CN RT CT NR TR RR CR NC TC RC CC)
foreach (GEMM_DEFINE ${GEMM_DEFINES})
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0)
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0)
endif ()
endforeach ()
GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "" 0)
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "NN" 1)
GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "U;N" "" 1)
GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "" 2)
if (SMP)
# N.B. these do NOT have a float type (e.g. DOUBLE) defined!
GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" 0 "" "" 1)
if (NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "THREADED_LEVEL3" 2 "syrk_thread")
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "THREADED_LEVEL3;NN" 2 "symm_thread")
endif ()
endif ()
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateCombinationObjects("zherk_kernel.c" "LOWER;CONJ" "U;N" "HERK" 2 "herk_kernel" false ${float_type})
# TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type})
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type})
# Need to set CONJ for trmm and trsm
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type})
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trmm_LC" false ${float_type})
GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_RR" false ${float_type})
GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trmm_RC" false ${float_type})
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_LR" false ${float_type})
GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type})
GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type})
# special gemm defines for complex
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC)
GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type})
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type})
endif ()
endforeach ()
endif ()
endforeach ()
#HPLOBJS =
# dgemm_nn.c dgemm_nt.c dgemm_tn.c dgemm_tt.c
# dtrsm_LNUU.c dtrsm_LNUN.c dtrsm_LNLU.c dtrsm_LNLN.c
# dtrsm_LTUU.c dtrsm_LTUN.c dtrsm_LTLU.c dtrsm_LTLN.c
# dtrsm_RNUU.c dtrsm_RNUN.c dtrsm_RNLU.c dtrsm_RNLN.c
# dtrsm_RTUU.c dtrsm_RTUN.c dtrsm_RTLU.c dtrsm_RTLN.c
#
#if (USE_SIMPLE_THREADED_LEVEL3)
# HPLOBJS += dgemm_thread_nn.c dgemm_thread_nt.c
# dgemm_thread_tn.c dgemm_thread_tt.c
#endif
#
add_library(driver_level3 OBJECT ${OPENBLAS_SRC})

View File

@ -47,7 +47,7 @@
#endif
#endif
static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
BLASLONG i;

View File

@ -49,7 +49,7 @@
#endif
#endif
static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) {
BLASLONG i;

View File

@ -70,6 +70,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG ls, is, js;
BLASLONG min_l, min_i, min_j;
BLASLONG jjs, min_jj;
#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA)))
BLASLONG start_ls;
#endif
m = args -> m;
n = args -> n;
@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
}
#else
BLASLONG start_ls;
for(js = n; js > 0; js -= GEMM_R){
min_j = js;
if (min_j > GEMM_R) min_j = GEMM_R;

View File

@ -76,6 +76,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG ls, is, js;
BLASLONG min_l, min_i, min_j;
BLASLONG jjs, min_jj;
#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA)))
BLASLONG start_is;
#endif
m = args -> m;
n = args -> n;
@ -178,8 +181,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
}
}
#else
BLASLONG start_is;
for(ls = m; ls > 0; ls -= GEMM_Q){
min_l = ls;
if (min_l > GEMM_Q) min_l = GEMM_Q;

View File

@ -75,6 +75,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG ls, is, js;
BLASLONG min_l, min_i, min_j;
BLASLONG jjs, min_jj;
#if !((defined(UPPER) && !defined(TRANSA)) || (!defined(UPPER) && defined(TRANSA)))
BLASLONG start_ls;
#endif
m = args -> m;
n = args -> n;
@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
}
#else
BLASLONG start_ls;
for(js = n; js > 0; js -= GEMM_R){
min_j = js;
if (min_j > GEMM_R) min_j = GEMM_R;

View File

@ -0,0 +1,73 @@
include_directories(${CMAKE_SOURCE_DIR})
if (${CORE} STREQUAL "PPC440")
set(MEMORY memory_qalloc.c)
else ()
set(MEMORY memory.c)
endif ()
if (SMP)
if (USE_OPENMP)
set(BLAS_SERVER blas_server_omp.c)
elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
set(BLAS_SERVER blas_server_win32.c)
endif ()
if (NOT DEFINED BLAS_SERVER)
set(BLAS_SERVER blas_server.c)
endif ()
set(SMP_SOURCES
${BLAS_SERVER}
divtable.c # TODO: Makefile has -UDOUBLE
blas_l1_thread.c
)
if (NOT NO_AFFINITY)
list(APPEND SMP_SOURCES init.c)
endif ()
endif ()
set(COMMON_SOURCES
xerbla.c
openblas_set_num_threads.c
openblas_error_handle.c
)
# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling
GenerateNamedObjects("abs.c" "" "c_abs" 0 "" "" 1 )
GenerateNamedObjects("abs.c" "DOUBLE" "z_abs" 0 "" "" 1)
GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c" "" "" 0 "" "" 1)
if (DYNAMIC_ARCH)
list(APPEND COMMON_SOURCES dynamic.c)
else ()
list(APPEND COMMON_SOURCES parameter.c)
endif ()
#ifdef EXPRECISION
#COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX)
#endif
#
#ifdef QUAD_PRECISION
#COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX)
#endif
#
#ifdef USE_CUDA
#COMMONOBJS += cuda_init.$(SUFFIX)
#endif
#
#ifdef FUNCTION_PROFILE
#COMMONOBJS += profile.$(SUFFIX)
#endif
#LIBOTHERS = libothers.$(LIBSUFFIX)
#ifeq ($(DYNAMIC_ARCH), 1)
#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
#else
#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
#endif
add_library(driver_others OBJECT ${OPENBLAS_SRC} ${MEMORY} ${SMP_SOURCES} ${COMMON_SOURCES})

View File

@ -139,8 +139,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
#if defined(_MSC_VER) && !defined(__clang__)
#define CONSTRUCTOR __cdecl
#define DESTRUCTOR __cdecl
#else
#define CONSTRUCTOR __attribute__ ((constructor))
#define DESTRUCTOR __attribute__ ((destructor))
#endif
#ifdef DYNAMIC_ARCH
gotoblas_t *gotoblas = NULL;
@ -795,12 +800,12 @@ static void *alloc_hugetlb(void *address){
if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) {
CloseHandle(hToken);
return -1;
return (void*)-1;
}
if (AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) != TRUE) {
CloseHandle(hToken);
return -1;
return (void*)-1;
}
map_address = (void *)VirtualAlloc(address,
@ -1399,6 +1404,28 @@ void DESTRUCTOR gotoblas_quit(void) {
#endif
}
#if defined(_MSC_VER) && !defined(__clang__)
BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved)
{
switch (ul_reason_for_call)
{
case DLL_PROCESS_ATTACH:
gotoblas_init();
break;
case DLL_THREAD_ATTACH:
break;
case DLL_THREAD_DETACH:
break;
case DLL_PROCESS_DETACH:
gotoblas_quit();
break;
default:
break;
}
return TRUE;
}
#endif
#if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
/* Don't call me; this is just work around for PGI / Sun bug */
void gotoblas_dummy_for_PGI(void) {

View File

@ -69,10 +69,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__)
#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)
#define OS_WINDOWS
#endif
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
#define INTEL_AMD
#endif
#include <stdio.h>
#include <string.h>
#ifdef OS_WINDOWS
@ -829,7 +833,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define OPENBLAS_SUPPORTED
#endif
#if defined(__i386__) || (__x86_64__)
#ifdef INTEL_AMD
#include "cpuid_x86.c"
#define OPENBLAS_SUPPORTED
#endif
@ -924,7 +928,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE
printf("CORE=%s\n", CORENAME);
#else
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
printf("CORE=%s\n", get_corename());
#endif
#endif
@ -944,7 +948,7 @@ int main(int argc, char *argv[]){
#endif
#if defined(__i386__) || defined(__x86_64__)
#ifdef INTEL_AMD
#ifndef FORCE
get_sse();
#else
@ -1024,7 +1028,7 @@ int main(int argc, char *argv[]){
#ifdef FORCE
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
#else
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__)
printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
#endif
#endif

120
interface/CMakeLists.txt Normal file
View File

@ -0,0 +1,120 @@
include_directories(${CMAKE_SOURCE_DIR})
set(BLAS1_SOURCES
copy.c
asum.c nrm2.c
)
set(BLAS1_REAL_ONLY_SOURCES
rotm.c rotmg.c # N.B. these do not have complex counterparts
)
# these will have 'z' prepended for the complex version
set(BLAS1_MANGLED_SOURCES
axpy.c swap.c
scal.c
dot.c
rot.c rotg.c
axpby.c
)
# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f
# these all have 'z' sources for complex versions
set(BLAS2_SOURCES
gemv.c ger.c
trsv.c trmv.c symv.c
syr.c syr2.c gbmv.c
sbmv.c spmv.c
spr.c spr2.c
tbsv.c tbmv.c
tpsv.c tpmv.c
)
# these do not have separate 'z' sources
set(BLAS3_SOURCES
gemm.c symm.c
trsm.c syrk.c syr2k.c
)
set(BLAS3_MANGLED_SOURCES
omatcopy.c imatcopy.c
)
# generate the BLAS objs once with and once without cblas
set (CBLAS_FLAGS "")
if (NOT DEFINED NO_FBLAS)
list(APPEND CBLAS_FLAGS 0)
endif ()
if (NOT DEFINED NO_CBLAS)
list(APPEND CBLAS_FLAGS 1)
endif ()
foreach (CBLAS_FLAG ${CBLAS_FLAGS})
# TODO: don't compile complex sources with cblas for now, the naming schemes are all different and they will have to be handled separately from SINGLE/DOUBLE
set(DISABLE_COMPLEX 0)
set(MANGLE_COMPLEX 3)
if (CBLAS_FLAG EQUAL 1)
set(DISABLE_COMPLEX 1)
set(MANGLE_COMPLEX 1)
endif ()
GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1)
GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX})
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
# trmm is trsm with a compiler flag set
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
# max and imax are compiled 4 times
GenerateNamedObjects("max.c" "" "" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "USE_ABS" "amax" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "USE_MIN" "min" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "" "i*max" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG})
endforeach ()
# complex-specific sources
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type})
GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type})
endif ()
if (${float_type} STREQUAL "COMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX")
GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX")
endif ()
if (${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX")
GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX")
endif ()
endforeach ()
if (NOT DEFINED NO_LAPACK)
set(LAPACK_SOURCES
lapack/gesv.c
)
# prepend z for complex versions
set(LAPACK_MANGLED_SOURCES
lapack/getrf.c lapack/getrs.c lapack/potrf.c lapack/getf2.c
lapack/potf2.c lapack/laswp.c lapack/lauu2.c
lapack/lauum.c lapack/trti2.c lapack/trtri.c
)
GenerateNamedObjects("${LAPACK_SOURCES}")
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3)
endif ()
add_library(interface OBJECT ${OPENBLAS_SRC})

View File

@ -121,6 +121,9 @@ void NAME(char *TRANSA, char *TRANSB,
FLOAT *sa, *sb;
#ifdef SMP
int nthreads_max;
int nthreads_avail;
double MNK;
#ifndef COMPLEX
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_REAL;
@ -237,6 +240,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
XFLOAT *sa, *sb;
#ifdef SMP
int nthreads_max;
int nthreads_avail;
double MNK;
#ifndef COMPLEX
#ifdef XDOUBLE
int mode = BLAS_XDOUBLE | BLAS_REAL;
@ -400,15 +406,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
mode |= (transa << BLAS_TRANSA_SHIFT);
mode |= (transb << BLAS_TRANSB_SHIFT);
int nthreads_max = num_cpu_avail(3);
int nthreads_avail = nthreads_max;
nthreads_max = num_cpu_avail(3);
nthreads_avail = nthreads_max;
#ifndef COMPLEX
double MNK = (double) args.m * (double) args.n * (double) args.k;
MNK = (double) args.m * (double) args.n * (double) args.k;
if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
nthreads_max = 1;
#else
double MNK = (double) args.m * (double) args.n * (double) args.k;
MNK = (double) args.m * (double) args.n * (double) args.k;
if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) )
nthreads_max = 1;
#endif

View File

@ -81,6 +81,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
FLOAT *buffer;
#ifdef SMP
int nthreads;
int nthreads_max;
int nthreads_avail;
double MNK;
#endif
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
@ -135,6 +138,9 @@ void CNAME(enum CBLAS_ORDER order,
blasint info, t;
#ifdef SMP
int nthreads;
int nthreads_max;
int nthreads_avail;
double MNK;
#endif
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
@ -235,10 +241,10 @@ void CNAME(enum CBLAS_ORDER order,
#ifdef SMP
int nthreads_max = num_cpu_avail(2);
int nthreads_avail = nthreads_max;
nthreads_max = num_cpu_avail(2);
nthreads_avail = nthreads_max;
double MNK = (double) m * (double) n;
MNK = (double) m * (double) n;
if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) )
nthreads_max = 1;

254
kernel/CMakeLists.txt Normal file
View File

@ -0,0 +1,254 @@
include_directories(${CMAKE_SOURCE_DIR})
include("${CMAKE_SOURCE_DIR}/cmake/kernel.cmake")
# Makefile
if (DEFINED TARGET_CORE)
#override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
set(BUILD_KERNEL 1)
set(KDIR "")
set(TSUFFIX "_${TARGET_CORE}")
else ()
set(TARGET_CORE ${CORE})
set(KDIR "")
set(TSUFFIX "")
endif ()
SetDefaultL1()
ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}")
ParseMakefileVars("${KERNELDIR}/KERNEL")
if (${ARCH} STREQUAL "x86")
GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true)
endif ()
# don't use float type name mangling here
GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" false "" "" true)
GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" false "" "" true)
GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" false "" "" true)
# Makefile.L1
foreach (float_type ${FLOAT_TYPES})
# a bit of metaprogramming here to pull out the appropriate KERNEL var
string(SUBSTRING ${float_type} 0 1 float_char)
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type})
if (DEFINED ${float_char}MAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type})
endif ()
if (DEFINED ${float_char}MINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type})
endif ()
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type})
if (DEFINED I${float_char}MAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type})
endif ()
if (DEFINED I${float_char}MINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type})
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dotu_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "CONJ" "dotc_k" false "" "" false ${float_type})
else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type})
endif ()
endforeach ()
# Makefile.L2
SetDefaultL2()
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char)
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "XCONJ" "gerv_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ;XCONJ" "gerd_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type})
else ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
endif ()
endforeach ()
# Makefile.L3
set(USE_GEMM3M false)
set(USE_TRMM false)
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS")
set(USE_GEMM3M true)
endif ()
if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC")
set(USE_TRMM true)
endif ()
foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
if (${float_char}GEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type})
endif ()
if (${float_char}GEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMITCOPY}" "${float_type}" "${${float_char}GEMMITCOPYOBJ}" false "" "" true ${float_type})
endif ()
if (${float_char}GEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMONCOPY}" "${float_type}" "${${float_char}GEMMONCOPYOBJ}" false "" "" true ${float_type})
endif ()
if (${float_char}GEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "${float_type}" "${${float_char}GEMMOTCOPYOBJ}" false "" "" true ${float_type})
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "gemm_beta" false "" "" false ${float_type})
if (USE_TRMM)
set(TRMM_KERNEL "${${float_char}TRMMKERNEL}")
else ()
set(TRMM_KERNEL "${${float_char}GEMMKERNEL}")
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
# just enumerate all these. there is an extra define for these indicating which side is a conjugate (e.g. CN NC NN) that I don't really want to work into GenerateCombinationObjects
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA;CONJ;CN" "trmm_kernel_LC" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "NN" "trmm_kernel_RN" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "UPPER;LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type})
else ()
GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type})
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type})
# These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define.
# Could simplify it a bit by pairing up by -UUNIT/-DUNIT.
GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "" "symm_iutcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type})
GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type})
if (NOT DEFINED ${float_char}OMATCOPY_CN)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CN ../arm/zomatcopy_cn.c)
else ()
set(${float_char}OMATCOPY_CN ../arm/omatcopy_cn.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RN)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RN ../arm/zomatcopy_rn.c)
else ()
set(${float_char}OMATCOPY_RN ../arm/omatcopy_rn.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_CT)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_CT ../arm/zomatcopy_ct.c)
else ()
set(${float_char}OMATCOPY_CT ../arm/omatcopy_ct.c)
endif ()
endif ()
if (NOT DEFINED ${float_char}OMATCOPY_RT)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C")
set(${float_char}OMATCOPY_RT ../arm/zomatcopy_rt.c)
else ()
set(${float_char}OMATCOPY_RT ../arm/omatcopy_rt.c)
endif ()
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type})
endforeach ()
# Makefile.LA
#DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX)
add_library(kernel OBJECT ${OPENBLAS_SRC})

View File

@ -35,9 +35,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
#include "common.h"
#include <complex.h>
#ifndef _MSC_VER
#include <complex.h>
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else
openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;

98
lapack/CMakeLists.txt Normal file
View File

@ -0,0 +1,98 @@
include_directories(${CMAKE_SOURCE_DIR})
set(LAPACK_SOURCES
getrf/getrf_single.c
potrf/potrf_U_single.c
potrf/potrf_L_single.c
lauum/lauum_U_single.c
lauum/lauum_L_single.c
)
# add a 'z' to filename for complex version
set(LAPACK_MANGLED_SOURCES
getf2/getf2_k.c
lauu2/lauu2_U.c
lauu2/lauu2_L.c
potf2/potf2_U.c
potf2/potf2_L.c
)
# sources that need TRANS set
# this has a 'z' version
set(TRANS_SOURCES
getrs/getrs_single.c
)
# sources that need UNIT set
# these do NOT have a z version
set(UNIT_SOURCES
trtri/trtri_U_single.c
trtri/trtri_L_single.c
)
# these have a 'z' version
set(UNIT_SOURCES2
trti2/trti2_U.c
trti2/trti2_L.c
)
GenerateNamedObjects("${LAPACK_SOURCES}")
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3)
# TODO: laswp needs arch specific code
GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus" false "" "" false 3)
GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus" false "" "" false 3)
if (SMP)
if (USE_OPENMP)
set(GETRF_SRC getrf/getrf_parallel_omp.c)
else ()
set(GETRF_SRC getrf/getrf_parallel.c)
endif ()
# these do not have 'z' versions
set(PARALLEL_SOURCES
${GETRF_SRC}
lauum/lauum_U_parallel.c
lauum/lauum_L_parallel.c
potrf/potrf_U_parallel.c
potrf/potrf_L_parallel.c
)
# this has a z version
list(APPEND TRANS_SOURCES
getrs/getrs_parallel.c
)
# these do NOT have a z version
list(APPEND UNIT_SOURCES
trtri/trtri_U_parallel.c
trtri/trtri_L_parallel.c
)
GenerateNamedObjects("${PARALLEL_SOURCES}")
endif ()
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
foreach (trans_src ${TRANS_SOURCES})
string(REGEX MATCH "[a-z]/([a-z]+_)([a-z]+)" op_name ${trans_src})
string(REPLACE "/" "/z" ztrans_src ${trans_src})
GenerateNamedObjects("${ztrans_src}" "TRANS=1" "${CMAKE_MATCH_1}N_${CMAKE_MATCH_2}" false "" "" false ${float_type})
GenerateNamedObjects("${ztrans_src}" "TRANS=2" "${CMAKE_MATCH_1}T_${CMAKE_MATCH_2}" false "" "" false ${float_type})
GenerateNamedObjects("${ztrans_src}" "TRANS=3" "${CMAKE_MATCH_1}R_${CMAKE_MATCH_2}" false "" "" false ${float_type})
GenerateNamedObjects("${ztrans_src}" "TRANS=4" "${CMAKE_MATCH_1}C_${CMAKE_MATCH_2}" false "" "" false ${float_type})
endforeach ()
else ()
GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" false ${float_type})
endif ()
endforeach ()
GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4)
GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3)
add_library(lapack OBJECT ${OPENBLAS_SRC})

View File

@ -67,7 +67,7 @@ double sqrt(double);
#undef GETRF_FACTOR
#define GETRF_FACTOR 1.00
static inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) {
static __inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) {
double m = (double)(M - IS - BK);
double n = (double)(N - IS - BK);
@ -373,7 +373,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
BLASLONG num_cpu;
#ifdef _MSC_VER
BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE];
#else
volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128)));
#endif
#ifndef COMPLEX
#ifdef XDOUBLE

View File

@ -43,7 +43,7 @@
#if !defined(XDOUBLE) || !defined(QUAD_PRECISION)
static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -141,7 +141,7 @@ static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -232,7 +232,7 @@ static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -362,7 +362,7 @@ static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -486,7 +486,7 @@ static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -613,7 +613,7 @@ static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -735,7 +735,7 @@ static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -862,7 +862,7 @@ static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -984,7 +984,7 @@ static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -1082,7 +1082,7 @@ static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -1180,7 +1180,7 @@ static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -1270,7 +1270,7 @@ static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -1360,7 +1360,7 @@ static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -1490,7 +1490,7 @@ static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -1620,7 +1620,7 @@ static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;
@ -1744,7 +1744,7 @@ static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
}
}
static inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
static __inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){
BLASLONG is, js;
FLOAT *aa1, *aa2;