diff --git a/.gitignore b/.gitignore index 8df228993..2c298e3b4 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,5 @@ test/sblat3 test/zblat1 test/zblat2 test/zblat3 +build +build.* diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..3b436dc13 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,190 @@ +## +## Author: Hank Anderson +## + +cmake_minimum_required(VERSION 2.8.4) +project(OpenBLAS) +set(OpenBLAS_MAJOR_VERSION 0) +set(OpenBLAS_MINOR_VERSION 2) +set(OpenBLAS_PATCH_VERSION 14) +set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") + +enable_language(ASM) +enable_language(C) + +if(MSVC) +set(OpenBLAS_LIBNAME libopenblas) +else() +set(OpenBLAS_LIBNAME openblas) +endif() + +####### +if(MSVC) +option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) +endif() +option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF) +option(BUILD_DEBUG "Build Debug Version" OFF) +####### +if(BUILD_WITHOUT_LAPACK) +set(NO_LAPACK 1) +set(NO_LAPACKE 1) +endif() + +if(BUILD_DEBUG) +set(CMAKE_BUILD_TYPE Debug) +else() +set(CMAKE_BUILD_TYPE Release) +endif() + +if(BUILD_WITHOUT_CBLAS) +set(NO_CBLAS 1) +endif() + +####### + + +message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") + +include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake") +include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") + +set(BLASDIRS interface driver/level2 driver/level3 driver/others) + +if (NOT DYNAMIC_ARCH) + list(APPEND BLASDIRS kernel) +endif () + +if (DEFINED UTEST_CHECK) + set(SANITY_CHECK 1) +endif () + +if (DEFINED SANITY_CHECK) + list(APPEND BLASDIRS reference) +endif () + +set(SUBDIRS ${BLASDIRS}) +if (NOT NO_LAPACK) + list(APPEND SUBDIRS lapack) +endif () + +# set which float types we want to build for +if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) + # if none are defined, build for all + set(BUILD_SINGLE true) + set(BUILD_DOUBLE true) + set(BUILD_COMPLEX true) + set(BUILD_COMPLEX16 true) +endif () + +set(FLOAT_TYPES "") +if (BUILD_SINGLE) + message(STATUS "Building Single Precision") + list(APPEND FLOAT_TYPES "SINGLE") # defines nothing +endif () + +if (BUILD_DOUBLE) + message(STATUS "Building Double Precision") + list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE +endif () + +if (BUILD_COMPLEX) + message(STATUS "Building Complex Precision") + list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX +endif () + +if (BUILD_COMPLEX16) + message(STATUS "Building Double Complex Precision") + list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE +endif () + +set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench) + +# all :: libs netlib tests shared + +# libs : +if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") + message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") +endif () + +if (${NO_STATIC} AND ${NO_SHARED}) + message(FATAL_ERROR "Neither static nor shared are enabled.") +endif () + +# get obj vars into format that add_library likes: $ (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) +set(TARGET_OBJS "") +foreach (SUBDIR ${SUBDIRS}) + add_subdirectory(${SUBDIR}) + string(REPLACE "/" "_" subdir_obj ${SUBDIR}) + list(APPEND TARGET_OBJS "$") +endforeach () + +# netlib: + +# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. +# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. +if (NOT NOFORTRAN AND NOT NO_LAPACK) + include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") +if (NOT NO_LAPACKE) + include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") +endif () +endif () + +#Only generate .def for dll on MSVC +if(MSVC) +set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") +endif() + +# add objects to the openblas lib +add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) + +include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") + + +if(NOT MSVC) +#only build shared library for MSVC +add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) +set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) +set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) + +if(SMP) +target_link_libraries(${OpenBLAS_LIBNAME} pthread) +target_link_libraries(${OpenBLAS_LIBNAME}_static pthread) +endif() + +#build test and ctest +enable_testing() +add_subdirectory(test) +if(NOT NO_CBLAS) +add_subdirectory(ctest) +endif() +endif() + +set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES + VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION} + SOVERSION ${OpenBLAS_MAJOR_VERSION} +) + + +# TODO: Why is the config saved here? Is this necessary with CMake? +#Save the config files for installation +# @cp Makefile.conf Makefile.conf_last +# @cp config.h config_last.h +#ifdef QUAD_PRECISION +# @echo "#define QUAD_PRECISION">> config_last.h +#endif +#ifeq ($(EXPRECISION), 1) +# @echo "#define EXPRECISION">> config_last.h +#endif +### +#ifeq ($(DYNAMIC_ARCH), 1) +# @$(MAKE) -C kernel commonlibs || exit 1 +# @for d in $(DYNAMIC_CORE) ; \ +# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ +# done +# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last +#endif +#ifdef USE_THREAD +# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last +#endif +# @touch lib.grd + diff --git a/Changelog.txt b/Changelog.txt index 6941a9f96..422b8b519 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,57 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.2.15 +27-Oct-2015 +common: + * Support cmake on x86/x86-64. Natively compiling on MS Visual Studio. + (experimental. Thank Hank Anderson for the initial cmake porting work.) + + On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels. + e.g. cmake . + make + make test (Optional) + + On Windows MS Visual Studio, OpenBLAS cmake only support C kernels. + (OpenBLAS uses AT&T style assembly, which is not supported by MSVC.) + e.g. cmake -G "Visual Studio 12 Win64" . + Open OpenBLAS.sln and build. + + * Enable MAX_STACK_ALLOC flags by default. + Improve ger and gemv for small matrices. + * Improve gemv parallel with small m and large n case. + * Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler) + * Add vecLib benchmarks (#565. Thanks, Andreas Noack.) + * Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak) + * Fix LAPACKE lansy (#640. Thanks, Dan Kortschak) + * Import bug fixes for LAPACKE s/dormlq, c/zunmlq + * Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden) + * Remove g77 from compiler list. + * Enable AppVeyor Windows CI. + +x86/x86-64: + * Support pure C generic kernels for x86/x86-64. + * Support Intel Boardwell and Skylake by Haswell kernels. + * Support AMD Excavator by Steamroller kernels. + * Optimize s/d/c/zdot for Intel SandyBridge and Haswell. + * Optimize s/d/c/zdot for AMD Piledriver and Steamroller. + * Optimize s/d/c/zapxy for Intel SandyBridge and Haswell. + * Optimize s/d/c/zapxy for AMD Piledriver and Steamroller. + * Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge. + * Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller. + * Optimize s/dger for Intel SandyBridge. + * Optimize s/dsymv for Intel SandyBridge. + * Optimize ssymv for Intel Haswell. + * Optimize dgemv for Intel Nehalem and Haswell. + * Optimize dtrmm for Intel Haswell. + +ARM: + * Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard) + e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7 + * Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas) +POWER: + * Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.) + * Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.) + ==================================================================== Version 0.2.14 24-Mar-2015 diff --git a/Makefile.rule b/Makefile.rule index 19f3fe3d9..459f79c26 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.14 +VERSION = 0.2.15 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library @@ -169,6 +169,9 @@ COMMON_PROF = -pg # 64 bit integer interfaces in OpenBLAS. # For details, https://github.com/xianyi/OpenBLAS/pull/459 # +# The same prefix and suffix are also added to the library name, +# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas +# # SYMBOLPREFIX= # SYMBOLSUFFIX= diff --git a/Makefile.system b/Makefile.system index 7a71b1d03..de086acad 100644 --- a/Makefile.system +++ b/Makefile.system @@ -891,12 +891,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3 CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3 endif -ifndef LIBNAMESUFFIX -LIBPREFIX = libopenblas -else -LIBPREFIX = libopenblas_$(LIBNAMESUFFIX) -endif - ifndef SYMBOLPREFIX SYMBOLPREFIX = endif @@ -905,6 +899,12 @@ ifndef SYMBOLSUFFIX SYMBOLSUFFIX = endif +ifndef LIBNAMESUFFIX +LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) +else +LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX) +endif + KERNELDIR = $(TOPDIR)/kernel/$(ARCH) include $(TOPDIR)/Makefile.$(ARCH) diff --git a/README.md b/README.md index 16f874078..0ec86d362 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,9 @@ [![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -[![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS) +Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS) +AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n8nkkx64/branch/develop?svg=true)](https://ci.appveyor.com/project/xianyi/openblas/branch/develop) ## Introduction OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 000000000..394e48854 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,42 @@ +version: 0.2.15.{build} + +#environment: + +platform: + - x64 + +configuration: Release + +clone_folder: c:\projects\OpenBLAS + +init: + - git config --global core.autocrlf input + +build: + project: OpenBLAS.sln + +clone_depth: 5 + +#branches to build +branches: + only: + - master + - develop + - cmake + +skip_tags: true + +matrix: + fast_finish: true + +skip_commits: +# Add [av skip] to commit messages + message: /\[av skip\]/ + +before_build: + - echo Running cmake... + - cd c:\projects\OpenBLAS + - cmake -G "Visual Studio 12 Win64" . + +test_script: + - echo Build OK! diff --git a/c_check b/c_check index 0fdadb659..d694e7411 100644 --- a/c_check +++ b/c_check @@ -30,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { $cross_suffix = $1; } } else { - if ($ARGV[0] =~ /(.*-)(.*)/) { + if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) { $cross_suffix = $1; } } diff --git a/cmake/arch.cmake b/cmake/arch.cmake new file mode 100644 index 000000000..d6fa3ed5d --- /dev/null +++ b/cmake/arch.cmake @@ -0,0 +1,115 @@ +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Sets various variables based on architecture. + +if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") + + if (${ARCH} STREQUAL "x86") + if (NOT BINARY) + set(NO_BINARY_MODE 1) + endif () + endif () + + if (NOT NO_EXPRECISION) + if (${F_COMPILER} MATCHES "GFORTRAN") + # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa + if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") + set(EXPRECISION 1) + set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") + set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") + endif () + if (${CMAKE_C_COMPILER} STREQUAL "Clang") + set(EXPRECISION 1) + set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") + set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") + endif () + endif () + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "Intel") + set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") +endif () + +if (USE_OPENMP) + + if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") + set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "Clang") + message(WARNING "Clang doesn't support OpenMP yet.") + set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "Intel") + set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "PGI") + set(CCOMMON_OPT "${CCOMMON_OPT} -mp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") + set(CCOMMON_OPT "${CCOMMON_OPT} -mp") + set(CEXTRALIB "${CEXTRALIB} -lstdc++") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") + set(CCOMMON_OPT "${CCOMMON_OPT} -mp") + endif () +endif () + + +if (DYNAMIC_ARCH) + if (${ARCH} STREQUAL "x86") + set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") + endif () + + if (${ARCH} STREQUAL "x86_64") + set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") + if (NOT NO_AVX) + set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER") + endif () + if (NOT NO_AVX2) + set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL") + endif () + endif () + + if (NOT DYNAMIC_CORE) + unset(DYNAMIC_ARCH) + endif () +endif () + +if (${ARCH} STREQUAL "ia64") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) + + if (${F_COMPILER} MATCHES "GFORTRAN") + if (${CMAKE_C_COMPILER} STREQUAL "GNU") + # EXPRECISION = 1 + # CCOMMON_OPT += -DEXPRECISION + endif () + endif () +endif () + +if (${ARCH} STREQUAL "mips64") + set(NO_BINARY_MODE 1) +endif () + +if (${ARCH} STREQUAL "alpha") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) +endif () + +if (${ARCH} STREQUAL "arm") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) +endif () + +if (${ARCH} STREQUAL "arm64") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) +endif () + diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake new file mode 100644 index 000000000..89ec31446 --- /dev/null +++ b/cmake/c_check.cmake @@ -0,0 +1,89 @@ +## +## Author: Hank Anderson +## Description: Ported from the OpenBLAS/c_check perl script. +## This is triggered by prebuild.cmake and runs before any of the code is built. +## Creates config.h and Makefile.conf. + +# CMake vars set by this file: +# OSNAME (use CMAKE_SYSTEM_NAME) +# ARCH +# C_COMPILER (use CMAKE_C_COMPILER) +# BINARY32 +# BINARY64 +# FU +# CROSS_SUFFIX +# CROSS +# CEXTRALIB + +# Defines set by this file: +# OS_ +# ARCH_ +# C_ +# __32BIT__ +# __64BIT__ +# FUNDERSCORE +# PTHREAD_CREATE_FUNC + +# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. +set(FU "") +if(APPLE) +set(FU "_") +elseif(MSVC) +set(FU "_") +elseif(UNIX) +set(FU "") +endif() + +# Convert CMake vars into the format that OpenBLAS expects +string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS) +if (${HOST_OS} STREQUAL "WINDOWS") + set(HOST_OS WINNT) +endif () + +# added by hpa - check size of void ptr to detect 64-bit compile +if (NOT DEFINED BINARY) + set(BINARY 32) + if (CMAKE_SIZEOF_VOID_P EQUAL 8) + set(BINARY 64) + endif () +endif () + +if (BINARY EQUAL 64) + set(BINARY64 1) +else () + set(BINARY32 1) +endif () + +# CMake docs define these: +# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for. +# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on. +# +# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check +set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) +if (${ARCH} STREQUAL "AMD64") + set(ARCH "x86_64") +endif () + +# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong +if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32) + set(ARCH x86) +endif () + +if (${ARCH} STREQUAL "X86") + set(ARCH x86) +endif () + +set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) +if (${COMPILER_ID} STREQUAL "GNU") + set(COMPILER_ID "GCC") +endif () + +string(TOUPPER ${ARCH} UC_ARCH) + +file(WRITE ${TARGET_CONF} + "#define OS_${HOST_OS}\t1\n" + "#define ARCH_${UC_ARCH}\t1\n" + "#define C_${COMPILER_ID}\t1\n" + "#define __${BINARY}BIT__\t1\n" + "#define FUNDERSCORE\t${FU}\n") + diff --git a/cmake/cc.cmake b/cmake/cc.cmake new file mode 100644 index 000000000..de196524f --- /dev/null +++ b/cmake/cc.cmake @@ -0,0 +1,103 @@ +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Sets C related variables. + +if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang") + + set(CCOMMON_OPT "${CCOMMON_OPT} -Wall") + set(COMMON_PROF "${COMMON_PROF} -fno-inline") + set(NO_UNINITIALIZED_WARN "-Wno-uninitialized") + + if (QUIET_MAKE) + set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused") + endif () + + if (NO_BINARY_MODE) + + if (${ARCH} STREQUAL "mips64") + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32") + endif () + set(BINARY_DEFINED 1) + endif () + + if (${CORE} STREQUAL "LOONGSON3A") + set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") + set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") + endif () + + if (${CORE} STREQUAL "LOONGSON3B") + set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") + set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") + endif () + + if (${OSNAME} STREQUAL "AIX") + set(BINARY_DEFINED 1) + endif () + endif () + + if (NOT BINARY_DEFINED) + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + endif () + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "PGI") + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7") + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") + + if (${ARCH} STREQUAL "mips64") + + if (NOT BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -n32") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -n64") + endif () + + if (${CORE} STREQUAL "LOONGSON3A") + set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") + endif () + + if (${CORE} STREQUAL "LOONGSON3B") + set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") + endif () + + else () + + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") + endif () + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "SUN") + set(CCOMMON_OPT "${CCOMMON_OPT} -w") + if (${ARCH} STREQUAL "x86") + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () +endif () + diff --git a/cmake/export.cmake b/cmake/export.cmake new file mode 100644 index 000000000..adf59101f --- /dev/null +++ b/cmake/export.cmake @@ -0,0 +1,60 @@ + +#Only generate .def for dll on MSVC +if(MSVC) + +set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1) + +if (NOT DEFINED ARCH) + set(ARCH_IN "x86_64") +else() + set(ARCH_IN ${ARCH}) +endif() + +if (${CORE} STREQUAL "generic") + set(ARCH_IN "GENERIC") +endif () + +if (NOT DEFINED EXPRECISION) + set(EXPRECISION_IN 0) +else() + set(EXPRECISION_IN ${EXPRECISION}) +endif() + +if (NOT DEFINED NO_CBLAS) + set(NO_CBLAS_IN 0) +else() + set(NO_CBLAS_IN ${NO_CBLAS}) +endif() + +if (NOT DEFINED NO_LAPACK) + set(NO_LAPACK_IN 0) +else() + set(NO_LAPACK_IN ${NO_LAPACK}) +endif() + +if (NOT DEFINED NO_LAPACKE) + set(NO_LAPACKE_IN 0) +else() + set(NO_LAPACKE_IN ${NO_LAPACKE}) +endif() + +if (NOT DEFINED NEED2UNDERSCORES) + set(NEED2UNDERSCORES_IN 0) +else() + set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES}) +endif() + +if (NOT DEFINED ONLY_CBLAS) + set(ONLY_CBLAS_IN 0) +else() + set(ONLY_CBLAS_IN ${ONLY_CBLAS}) +endif() + +add_custom_command( + TARGET ${OpenBLAS_LIBNAME} PRE_LINK + COMMAND perl + ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" + COMMENT "Create openblas.def file" + VERBATIM) + +endif() \ No newline at end of file diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake new file mode 100644 index 000000000..e8fe4bfa7 --- /dev/null +++ b/cmake/f_check.cmake @@ -0,0 +1,66 @@ +## +## Author: Hank Anderson +## Copyright: (c) Stat-Ease, Inc. +## Created: 12/29/14 +## Last Modified: 12/29/14 +## Description: Ported from the OpenBLAS/f_check perl script. +## This is triggered by prebuild.cmake and runs before any of the code is built. +## Appends Fortran information to config.h and Makefile.conf. + +# CMake vars set by this file: +# F_COMPILER +# FC +# BU +# NOFORTRAN +# NEED2UNDERSCORES +# FEXTRALIB + +# Defines set by this file: +# BUNDERSCORE +# NEEDBUNDERSCORE +# NEED2UNDERSCORES + +if (MSVC) + # had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) +endif () + +if (NOT NO_LAPACK) + enable_language(Fortran) +else() + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) +endif() + +if (NOT ONLY_CBLAS) + # N.B. f_check is not cross-platform, so instead try to use CMake variables + # run f_check (appends to TARGET files) +# message(STATUS "Running f_check...") +# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER} +# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + + # TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile + # TODO: set FEXTRALIB flags a la f_check? + + set(BU "_") + file(APPEND ${TARGET_CONF} + "#define BUNDERSCORE _\n" + "#define NEEDBUNDERSCORE 1\n" + "#define NEED2UNDERSCORES 0\n") + +else () + + #When we only build CBLAS, we set NOFORTRAN=2 + set(NOFORTRAN 2) + set(NO_FBLAS 1) + #set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler + set(BU "_") + file(APPEND ${TARGET_CONF} + "#define BUNDERSCORE _\n" + "#define NEEDBUNDERSCORE 1\n") +endif() + +get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE) +string(TOUPPER ${F_COMPILER} F_COMPILER) + diff --git a/cmake/fc.cmake b/cmake/fc.cmake new file mode 100644 index 000000000..ba156c210 --- /dev/null +++ b/cmake/fc.cmake @@ -0,0 +1,200 @@ +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Sets Fortran related variables. + +if (${F_COMPILER} STREQUAL "G77") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + if (NOT NO_BINARY_MODE) + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + endif () + endif () +endif () + +if (${F_COMPILER} STREQUAL "G95") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + if (NOT NO_BINARY_MODE) + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + endif () + endif () +endif () + +if (${F_COMPILER} STREQUAL "GFORTRAN") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc + if (NOT NO_LAPACK) + set(EXTRALIB "{EXTRALIB} -lgfortran") + endif () + if (NO_BINARY_MODE) + if (${ARCH} STREQUAL "mips64") + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") + endif () + endif () + else () + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") + endif () + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + endif () + endif () + + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "INTEL") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "FUJITSU") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU") + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "IBM") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM") + # FCOMMON_OPT += -qarch=440 + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -q64") + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8") + endif () + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -q32") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "PGI") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI") + set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER") + if (BINARY64) + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -mp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "PATHSCALE") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE") + if (BINARY64) + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + endif () + + if (NOT ${ARCH} STREQUAL "mips64") + if (NOT BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () + else () + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") + endif () + endif () + + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -mp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "OPEN64") + + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64") + if (BINARY64) + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + endif () + + if (${ARCH} STREQUAL "mips64") + + if (NOT BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -n32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -n64") + endif () + + if (${CORE} STREQUAL "LOONGSON3A") + set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") + endif () + + if (${CORE} STREQUAL "LOONGSON3B") + set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") + endif () + else () + if (NOT BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () + endif () + + if (USE_OPENMP) + set(FEXTRALIB "${FEXTRALIB} -lstdc++") + set(FCOMMON_OPT "${FCOMMON_OPT} -mp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "SUN") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN") + if (${ARCH} STREQUAL "x86") + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel") + endif () +endif () + +if (${F_COMPILER} STREQUAL "COMPAQ") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ") + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +# from the root Makefile - this is for lapack-netlib to compile the correct secnd file. +if (${F_COMPILER} STREQUAL "GFORTRAN") + set(TIMER "INT_ETIME") +else () + set(TIMER "NONE") +endif () + diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake new file mode 100644 index 000000000..fad84de51 --- /dev/null +++ b/cmake/kernel.cmake @@ -0,0 +1,165 @@ +# helper functions for the kernel CMakeLists.txt + + +# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file. +macro(SetDefaultL1) + set(SAMAXKERNEL amax.S) + set(DAMAXKERNEL amax.S) + set(QAMAXKERNEL amax.S) + set(CAMAXKERNEL zamax.S) + set(ZAMAXKERNEL zamax.S) + set(XAMAXKERNEL zamax.S) + set(SAMINKERNEL amin.S) + set(DAMINKERNEL amin.S) + set(QAMINKERNEL amin.S) + set(CAMINKERNEL zamin.S) + set(ZAMINKERNEL zamin.S) + set(XAMINKERNEL zamin.S) + set(SMAXKERNEL max.S) + set(DMAXKERNEL max.S) + set(QMAXKERNEL max.S) + set(SMINKERNEL min.S) + set(DMINKERNEL min.S) + set(QMINKERNEL min.S) + set(ISAMAXKERNEL iamax.S) + set(IDAMAXKERNEL iamax.S) + set(IQAMAXKERNEL iamax.S) + set(ICAMAXKERNEL izamax.S) + set(IZAMAXKERNEL izamax.S) + set(IXAMAXKERNEL izamax.S) + set(ISAMINKERNEL iamin.S) + set(IDAMINKERNEL iamin.S) + set(IQAMINKERNEL iamin.S) + set(ICAMINKERNEL izamin.S) + set(IZAMINKERNEL izamin.S) + set(IXAMINKERNEL izamin.S) + set(ISMAXKERNEL iamax.S) + set(IDMAXKERNEL iamax.S) + set(IQMAXKERNEL iamax.S) + set(ISMINKERNEL iamin.S) + set(IDMINKERNEL iamin.S) + set(IQMINKERNEL iamin.S) + set(SASUMKERNEL asum.S) + set(DASUMKERNEL asum.S) + set(CASUMKERNEL zasum.S) + set(ZASUMKERNEL zasum.S) + set(QASUMKERNEL asum.S) + set(XASUMKERNEL zasum.S) + set(SAXPYKERNEL axpy.S) + set(DAXPYKERNEL axpy.S) + set(CAXPYKERNEL zaxpy.S) + set(ZAXPYKERNEL zaxpy.S) + set(QAXPYKERNEL axpy.S) + set(XAXPYKERNEL zaxpy.S) + set(SCOPYKERNEL copy.S) + set(DCOPYKERNEL copy.S) + set(CCOPYKERNEL zcopy.S) + set(ZCOPYKERNEL zcopy.S) + set(QCOPYKERNEL copy.S) + set(XCOPYKERNEL zcopy.S) + set(SDOTKERNEL dot.S) + set(DDOTKERNEL dot.S) + set(CDOTKERNEL zdot.S) + set(ZDOTKERNEL zdot.S) + set(QDOTKERNEL dot.S) + set(XDOTKERNEL zdot.S) + set(SNRM2KERNEL nrm2.S) + set(DNRM2KERNEL nrm2.S) + set(QNRM2KERNEL nrm2.S) + set(CNRM2KERNEL znrm2.S) + set(ZNRM2KERNEL znrm2.S) + set(XNRM2KERNEL znrm2.S) + set(SROTKERNEL rot.S) + set(DROTKERNEL rot.S) + set(QROTKERNEL rot.S) + set(CROTKERNEL zrot.S) + set(ZROTKERNEL zrot.S) + set(XROTKERNEL zrot.S) + set(SSCALKERNEL scal.S) + set(DSCALKERNEL scal.S) + set(CSCALKERNEL zscal.S) + set(ZSCALKERNEL zscal.S) + set(QSCALKERNEL scal.S) + set(XSCALKERNEL zscal.S) + set(SSWAPKERNEL swap.S) + set(DSWAPKERNEL swap.S) + set(CSWAPKERNEL zswap.S) + set(ZSWAPKERNEL zswap.S) + set(QSWAPKERNEL swap.S) + set(XSWAPKERNEL zswap.S) + set(SGEMVNKERNEL gemv_n.S) + set(SGEMVTKERNEL gemv_t.S) + set(DGEMVNKERNEL gemv_n.S) + set(DGEMVTKERNEL gemv_t.S) + set(CGEMVNKERNEL zgemv_n.S) + set(CGEMVTKERNEL zgemv_t.S) + set(ZGEMVNKERNEL zgemv_n.S) + set(ZGEMVTKERNEL zgemv_t.S) + set(QGEMVNKERNEL gemv_n.S) + set(QGEMVTKERNEL gemv_t.S) + set(XGEMVNKERNEL zgemv_n.S) + set(XGEMVTKERNEL zgemv_t.S) + set(SCABS_KERNEL ../generic/cabs.c) + set(DCABS_KERNEL ../generic/cabs.c) + set(QCABS_KERNEL ../generic/cabs.c) + set(LSAME_KERNEL ../generic/lsame.c) + set(SAXPBYKERNEL ../arm/axpby.c) + set(DAXPBYKERNEL ../arm/axpby.c) + set(CAXPBYKERNEL ../arm/zaxpby.c) + set(ZAXPBYKERNEL ../arm/zaxpby.c) +endmacro () + +macro(SetDefaultL2) + set(SGEMVNKERNEL gemv_n.S) + set(SGEMVTKERNEL gemv_t.S) + set(DGEMVNKERNEL gemv_n.S) + set(DGEMVTKERNEL gemv_t.S) + set(CGEMVNKERNEL zgemv_n.S) + set(CGEMVTKERNEL zgemv_t.S) + set(ZGEMVNKERNEL zgemv_n.S) + set(ZGEMVTKERNEL zgemv_t.S) + set(QGEMVNKERNEL gemv_n.S) + set(QGEMVTKERNEL gemv_t.S) + set(XGEMVNKERNEL zgemv_n.S) + set(XGEMVTKERNEL zgemv_t.S) + set(SGERKERNEL ../generic/ger.c) + set(DGERKERNEL ../generic/ger.c) + set(QGERKERNEL ../generic/ger.c) + set(CGERUKERNEL ../generic/zger.c) + set(CGERCKERNEL ../generic/zger.c) + set(ZGERUKERNEL ../generic/zger.c) + set(ZGERCKERNEL ../generic/zger.c) + set(XGERUKERNEL ../generic/zger.c) + set(XGERCKERNEL ../generic/zger.c) + set(SSYMV_U_KERNEL ../generic/symv_k.c) + set(SSYMV_L_KERNEL ../generic/symv_k.c) + set(DSYMV_U_KERNEL ../generic/symv_k.c) + set(DSYMV_L_KERNEL ../generic/symv_k.c) + set(QSYMV_U_KERNEL ../generic/symv_k.c) + set(QSYMV_L_KERNEL ../generic/symv_k.c) + set(CSYMV_U_KERNEL ../generic/zsymv_k.c) + set(CSYMV_L_KERNEL ../generic/zsymv_k.c) + set(ZSYMV_U_KERNEL ../generic/zsymv_k.c) + set(ZSYMV_L_KERNEL ../generic/zsymv_k.c) + set(XSYMV_U_KERNEL ../generic/zsymv_k.c) + set(XSYMV_L_KERNEL ../generic/zsymv_k.c) + set(CHEMV_U_KERNEL ../generic/zhemv_k.c) + set(CHEMV_L_KERNEL ../generic/zhemv_k.c) + set(CHEMV_V_KERNEL ../generic/zhemv_k.c) + set(CHEMV_M_KERNEL ../generic/zhemv_k.c) + set(ZHEMV_U_KERNEL ../generic/zhemv_k.c) + set(ZHEMV_L_KERNEL ../generic/zhemv_k.c) + set(ZHEMV_V_KERNEL ../generic/zhemv_k.c) + set(ZHEMV_M_KERNEL ../generic/zhemv_k.c) + set(XHEMV_U_KERNEL ../generic/zhemv_k.c) + set(XHEMV_L_KERNEL ../generic/zhemv_k.c) + set(XHEMV_V_KERNEL ../generic/zhemv_k.c) + set(XHEMV_M_KERNEL ../generic/zhemv_k.c) +endmacro () + +macro(SetDefaultL3) + set(SGEADD_KERNEL ../generic/geadd.c) + set(DGEADD_KERNEL ../generic/geadd.c) + set(CGEADD_KERNEL ../generic/zgeadd.c) + set(ZGEADD_KERNEL ../generic/zgeadd.c) +endmacro () \ No newline at end of file diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake new file mode 100644 index 000000000..3e81611ab --- /dev/null +++ b/cmake/lapack.cmake @@ -0,0 +1,347 @@ +# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. + +set(ALLAUX + ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f + ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f + ../INSTALL/ilaver.f ../INSTALL/slamch.f +) + +set(SCLAUX + sbdsdc.f + sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f + slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f + slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f + slagts.f slamrg.f slanst.f + slapy2.f slapy3.f slarnv.f + slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f + slarrk.f slarrr.f slaneg.f + slartg.f slaruv.f slas2.f slascl.f + slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f + slasd7.f slasd8.f slasda.f slasdq.f slasdt.f + slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f + slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f + ssteqr.f ssterf.f slaisnan.f sisnan.f + slartgp.f slartgs.f + ../INSTALL/second_${TIMER}.f +) + +set(DZLAUX + dbdsdc.f + dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f + dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f + dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f + dlagts.f dlamrg.f dlanst.f + dlapy2.f dlapy3.f dlarnv.f + dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f + dlarrk.f dlarrr.f dlaneg.f + dlartg.f dlaruv.f dlas2.f dlascl.f + dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f + dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f + dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f + dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f + dsteqr.f dsterf.f dlaisnan.f disnan.f + dlartgp.f dlartgs.f + ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f +) + +set(SLASRC + sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f + sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f + sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f + sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f + sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f + sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f + sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f + sgetc2.f sgetri.f + sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f + sggglm.f sgghrd.f sgglse.f sggqrf.f + sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f + sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f + shsein.f shseqr.f slabrd.f slacon.f slacn2.f + slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f + slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f + slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f + slansy.f slantb.f slantp.f slantr.f slanv2.f + slapll.f slapmt.f + slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f + slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f + slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f + slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f + slarrv.f slartv.f + slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f + slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f + sopgtr.f sopmtr.f sorg2l.f sorg2r.f + sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f + sorgrq.f sorgtr.f sorm2l.f sorm2r.f + sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f + sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f + spbstf.f spbsv.f spbsvx.f + spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f + sposvx.f spstrf.f spstf2.f + sppcon.f sppequ.f + spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f + spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f + ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f + ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f + sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f + ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f + sstevx.f + ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f + ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f + ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f + ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f + ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f + ssytri_rook.f ssycon_rook.f ssysv_rook.f + stbcon.f + stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f + stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f + stptrs.f + strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f + strtrs.f stzrqf.f stzrzf.f sstemr.f + slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f + stfttr.f stpttf.f stpttr.f strttf.f strttp.f + sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f + sgeequb.f ssyequb.f spoequb.f sgbequb.f + sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f + sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f + sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f + stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f +) + +set(DSLASRC spotrs.f) + +set(CLASRC + cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f + cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f + cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f + cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f + cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f + cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f + cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f + cgesvx.f cgetc2.f cgetri.f + cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f + cgghrd.f cgglse.f cggqrf.f cggrqf.f + cggsvd.f cggsvp.f + cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f + chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f + checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f + chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f + chetf2.f chetrd.f + chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f + chetrs.f chetrs2.f + chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f + chgeqz.f chpcon.f chpev.f chpevd.f + chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f + chpsvx.f + chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f + clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f + claed0.f claed7.f claed8.f + claein.f claesy.f claev2.f clags2.f clagtm.f + clahef.f clahef_rook.f clahqr.f + clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f + clanhb.f clanhe.f + clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f + clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f + claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f + claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f + claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f + clarf.f clarfb.f clarfg.f clarft.f clarfgp.f + clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f + clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f + clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f + clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f + cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f + cposv.f cposvx.f cpstrf.f cpstf2.f + cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f + cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f + crot.f cspcon.f csprfs.f cspsv.f + cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f + cstegr.f cstein.f csteqr.f + csycon.f + csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f + csyswapr.f csytrs.f csytrs2.f csyconv.f + csytf2_rook.f csytrf_rook.f csytrs_rook.f + csytri_rook.f csycon_rook.f csysv_rook.f + ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f + ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f + ctprfs.f ctptri.f + ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f + ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f + cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f + cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f + cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f + cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f + chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f + ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f + cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f + cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f + cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f + cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f + ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f +) + +set(ZCLASRC cpotrs.f) + +set(DLASRC + dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f + dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f + dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f + dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f + dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f + dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f + dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f + dgetc2.f dgetri.f + dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f + dggglm.f dgghrd.f dgglse.f dggqrf.f + dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f + dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f + dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f + dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f + dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f + dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f + dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f + dlapll.f dlapmt.f + dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f + dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f + dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f + dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f + dlargv.f dlarrv.f dlartv.f + dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f + dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f + dopgtr.f dopmtr.f dorg2l.f dorg2r.f + dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f + dorgrq.f dorgtr.f dorm2l.f dorm2r.f + dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f + dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f + dpbstf.f dpbsv.f dpbsvx.f + dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f + dposvx.f dpotrs.f dpstrf.f dpstf2.f + dppcon.f dppequ.f + dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f + dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f + dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f + dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f + dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f + dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f + dstevx.f + dsycon.f dsyev.f dsyevd.f dsyevr.f + dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f + dsysv.f dsysvx.f + dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f + dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f + dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f + dsytri_rook.f dsycon_rook.f dsysv_rook.f + dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f + dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f + dtptrs.f + dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f + dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f + dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f + dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f + dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f + dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f + dgeequb.f dsyequb.f dpoequb.f dgbequb.f + dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f + dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f + dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f + dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f +) + +set(ZLASRC + zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f + zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f + zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f + zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f + zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f + zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f + zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f + zgetri.f + zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f + zgghrd.f zgglse.f zggqrf.f zggrqf.f + zggsvd.f zggsvp.f + zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f + zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f + zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f + zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f + zhetf2.f zhetrd.f + zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f + zhetrs.f zhetrs2.f + zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f + zhgeqz.f zhpcon.f zhpev.f zhpevd.f + zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f + zhpsvx.f + zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f + zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f + zlaed0.f zlaed7.f zlaed8.f + zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f + zlahef.f zlahef_rook.f zlahqr.f + zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f + zlangt.f zlanhb.f + zlanhe.f + zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f + zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f + zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f + zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f + zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f + zlarcm.f zlarf.f zlarfb.f + zlarfg.f zlarft.f zlarfgp.f + zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f + zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f + zlassq.f zlasyf.f zlasyf_rook.f + zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f + zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f + zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f + zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f + zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f + zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f + zrot.f zspcon.f zsprfs.f zspsv.f + zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f + zstegr.f zstein.f zsteqr.f + zsycon.f + zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f + zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f + zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f + zsytri_rook.f zsycon_rook.f zsysv_rook.f + ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f + ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f + ztprfs.f ztptri.f + ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f + ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f + zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f + zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f + zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f + zunmtr.f zupgtr.f + zupmtr.f izmax1.f dzsum1.f zstemr.f + zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f + zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f + ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f + zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f + zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f + zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f + zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f + ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f +) + +set(LA_REL_SRC ${ALLAUX}) +if (BUILD_SINGLE) + list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX}) +endif () + +if (BUILD_DOUBLE) + list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX}) +endif () + +if (BUILD_COMPLEX) + list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX}) +endif () + +if (BUILD_COMPLEX16) + list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX}) +endif () + +# add lapack-netlib folder to the sources +set(LA_SOURCES "") +foreach (LA_FILE ${LA_REL_SRC}) + list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}") +endforeach () +set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake new file mode 100644 index 000000000..39ade0577 --- /dev/null +++ b/cmake/lapacke.cmake @@ -0,0 +1,2067 @@ + +set(C_SRC + lapacke_cbbcsd.c + lapacke_cbbcsd_work.c + lapacke_cbdsqr.c + lapacke_cbdsqr_work.c + lapacke_cgbbrd.c + lapacke_cgbbrd_work.c + lapacke_cgbcon.c + lapacke_cgbcon_work.c + lapacke_cgbequ.c + lapacke_cgbequ_work.c + lapacke_cgbequb.c + lapacke_cgbequb_work.c + lapacke_cgbrfs.c + lapacke_cgbrfs_work.c + lapacke_cgbsv.c + lapacke_cgbsv_work.c + lapacke_cgbsvx.c + lapacke_cgbsvx_work.c + lapacke_cgbtrf.c + lapacke_cgbtrf_work.c + lapacke_cgbtrs.c + lapacke_cgbtrs_work.c + lapacke_cgebak.c + lapacke_cgebak_work.c + lapacke_cgebal.c + lapacke_cgebal_work.c + lapacke_cgebrd.c + lapacke_cgebrd_work.c + lapacke_cgecon.c + lapacke_cgecon_work.c + lapacke_cgeequ.c + lapacke_cgeequ_work.c + lapacke_cgeequb.c + lapacke_cgeequb_work.c + lapacke_cgees.c + lapacke_cgees_work.c + lapacke_cgeesx.c + lapacke_cgeesx_work.c + lapacke_cgeev.c + lapacke_cgeev_work.c + lapacke_cgeevx.c + lapacke_cgeevx_work.c + lapacke_cgehrd.c + lapacke_cgehrd_work.c + lapacke_cgelq2.c + lapacke_cgelq2_work.c + lapacke_cgelqf.c + lapacke_cgelqf_work.c + lapacke_cgels.c + lapacke_cgels_work.c + lapacke_cgelsd.c + lapacke_cgelsd_work.c + lapacke_cgelss.c + lapacke_cgelss_work.c + lapacke_cgelsy.c + lapacke_cgelsy_work.c + lapacke_cgemqrt.c + lapacke_cgemqrt_work.c + lapacke_cgeqlf.c + lapacke_cgeqlf_work.c + lapacke_cgeqp3.c + lapacke_cgeqp3_work.c + lapacke_cgeqpf.c + lapacke_cgeqpf_work.c + lapacke_cgeqr2.c + lapacke_cgeqr2_work.c + lapacke_cgeqrf.c + lapacke_cgeqrf_work.c + lapacke_cgeqrfp.c + lapacke_cgeqrfp_work.c + lapacke_cgeqrt.c + lapacke_cgeqrt2.c + lapacke_cgeqrt2_work.c + lapacke_cgeqrt3.c + lapacke_cgeqrt3_work.c + lapacke_cgeqrt_work.c + lapacke_cgerfs.c + lapacke_cgerfs_work.c + lapacke_cgerqf.c + lapacke_cgerqf_work.c + lapacke_cgesdd.c + lapacke_cgesdd_work.c + lapacke_cgesv.c + lapacke_cgesv_work.c + lapacke_cgesvd.c + lapacke_cgesvd_work.c + lapacke_cgesvx.c + lapacke_cgesvx_work.c + lapacke_cgetf2.c + lapacke_cgetf2_work.c + lapacke_cgetrf.c + lapacke_cgetrf_work.c + lapacke_cgetri.c + lapacke_cgetri_work.c + lapacke_cgetrs.c + lapacke_cgetrs_work.c + lapacke_cggbak.c + lapacke_cggbak_work.c + lapacke_cggbal.c + lapacke_cggbal_work.c + lapacke_cgges.c + lapacke_cgges_work.c + lapacke_cggesx.c + lapacke_cggesx_work.c + lapacke_cggev.c + lapacke_cggev_work.c + lapacke_cggevx.c + lapacke_cggevx_work.c + lapacke_cggglm.c + lapacke_cggglm_work.c + lapacke_cgghrd.c + lapacke_cgghrd_work.c + lapacke_cgglse.c + lapacke_cgglse_work.c + lapacke_cggqrf.c + lapacke_cggqrf_work.c + lapacke_cggrqf.c + lapacke_cggrqf_work.c + lapacke_cggsvd.c + lapacke_cggsvd_work.c + lapacke_cggsvp.c + lapacke_cggsvp_work.c + lapacke_cgtcon.c + lapacke_cgtcon_work.c + lapacke_cgtrfs.c + lapacke_cgtrfs_work.c + lapacke_cgtsv.c + lapacke_cgtsv_work.c + lapacke_cgtsvx.c + lapacke_cgtsvx_work.c + lapacke_cgttrf.c + lapacke_cgttrf_work.c + lapacke_cgttrs.c + lapacke_cgttrs_work.c + lapacke_chbev.c + lapacke_chbev_work.c + lapacke_chbevd.c + lapacke_chbevd_work.c + lapacke_chbevx.c + lapacke_chbevx_work.c + lapacke_chbgst.c + lapacke_chbgst_work.c + lapacke_chbgv.c + lapacke_chbgv_work.c + lapacke_chbgvd.c + lapacke_chbgvd_work.c + lapacke_chbgvx.c + lapacke_chbgvx_work.c + lapacke_chbtrd.c + lapacke_chbtrd_work.c + lapacke_checon.c + lapacke_checon_work.c + lapacke_cheequb.c + lapacke_cheequb_work.c + lapacke_cheev.c + lapacke_cheev_work.c + lapacke_cheevd.c + lapacke_cheevd_work.c + lapacke_cheevr.c + lapacke_cheevr_work.c + lapacke_cheevx.c + lapacke_cheevx_work.c + lapacke_chegst.c + lapacke_chegst_work.c + lapacke_chegv.c + lapacke_chegv_work.c + lapacke_chegvd.c + lapacke_chegvd_work.c + lapacke_chegvx.c + lapacke_chegvx_work.c + lapacke_cherfs.c + lapacke_cherfs_work.c + lapacke_chesv.c + lapacke_chesv_work.c + lapacke_chesvx.c + lapacke_chesvx_work.c + lapacke_cheswapr.c + lapacke_cheswapr_work.c + lapacke_chetrd.c + lapacke_chetrd_work.c + lapacke_chetrf.c + lapacke_chetrf_work.c + lapacke_chetri.c + lapacke_chetri2.c + lapacke_chetri2_work.c + lapacke_chetri2x.c + lapacke_chetri2x_work.c + lapacke_chetri_work.c + lapacke_chetrs.c + lapacke_chetrs2.c + lapacke_chetrs2_work.c + lapacke_chetrs_work.c + lapacke_chfrk.c + lapacke_chfrk_work.c + lapacke_chgeqz.c + lapacke_chgeqz_work.c + lapacke_chpcon.c + lapacke_chpcon_work.c + lapacke_chpev.c + lapacke_chpev_work.c + lapacke_chpevd.c + lapacke_chpevd_work.c + lapacke_chpevx.c + lapacke_chpevx_work.c + lapacke_chpgst.c + lapacke_chpgst_work.c + lapacke_chpgv.c + lapacke_chpgv_work.c + lapacke_chpgvd.c + lapacke_chpgvd_work.c + lapacke_chpgvx.c + lapacke_chpgvx_work.c + lapacke_chprfs.c + lapacke_chprfs_work.c + lapacke_chpsv.c + lapacke_chpsv_work.c + lapacke_chpsvx.c + lapacke_chpsvx_work.c + lapacke_chptrd.c + lapacke_chptrd_work.c + lapacke_chptrf.c + lapacke_chptrf_work.c + lapacke_chptri.c + lapacke_chptri_work.c + lapacke_chptrs.c + lapacke_chptrs_work.c + lapacke_chsein.c + lapacke_chsein_work.c + lapacke_chseqr.c + lapacke_chseqr_work.c + lapacke_clacgv.c + lapacke_clacgv_work.c + lapacke_clacn2.c + lapacke_clacn2_work.c + lapacke_clacp2.c + lapacke_clacp2_work.c + lapacke_clacpy.c + lapacke_clacpy_work.c + lapacke_clag2z.c + lapacke_clag2z_work.c + lapacke_clange.c + lapacke_clange_work.c + lapacke_clanhe.c + lapacke_clanhe_work.c + lapacke_clansy.c + lapacke_clansy_work.c + lapacke_clantr.c + lapacke_clantr_work.c + lapacke_clapmr.c + lapacke_clapmr_work.c + lapacke_clarfb.c + lapacke_clarfb_work.c + lapacke_clarfg.c + lapacke_clarfg_work.c + lapacke_clarft.c + lapacke_clarft_work.c + lapacke_clarfx.c + lapacke_clarfx_work.c + lapacke_clarnv.c + lapacke_clarnv_work.c + lapacke_claset.c + lapacke_claset_work.c + lapacke_claswp.c + lapacke_claswp_work.c + lapacke_clauum.c + lapacke_clauum_work.c + lapacke_cpbcon.c + lapacke_cpbcon_work.c + lapacke_cpbequ.c + lapacke_cpbequ_work.c + lapacke_cpbrfs.c + lapacke_cpbrfs_work.c + lapacke_cpbstf.c + lapacke_cpbstf_work.c + lapacke_cpbsv.c + lapacke_cpbsv_work.c + lapacke_cpbsvx.c + lapacke_cpbsvx_work.c + lapacke_cpbtrf.c + lapacke_cpbtrf_work.c + lapacke_cpbtrs.c + lapacke_cpbtrs_work.c + lapacke_cpftrf.c + lapacke_cpftrf_work.c + lapacke_cpftri.c + lapacke_cpftri_work.c + lapacke_cpftrs.c + lapacke_cpftrs_work.c + lapacke_cpocon.c + lapacke_cpocon_work.c + lapacke_cpoequ.c + lapacke_cpoequ_work.c + lapacke_cpoequb.c + lapacke_cpoequb_work.c + lapacke_cporfs.c + lapacke_cporfs_work.c + lapacke_cposv.c + lapacke_cposv_work.c + lapacke_cposvx.c + lapacke_cposvx_work.c + lapacke_cpotrf.c + lapacke_cpotrf_work.c + lapacke_cpotri.c + lapacke_cpotri_work.c + lapacke_cpotrs.c + lapacke_cpotrs_work.c + lapacke_cppcon.c + lapacke_cppcon_work.c + lapacke_cppequ.c + lapacke_cppequ_work.c + lapacke_cpprfs.c + lapacke_cpprfs_work.c + lapacke_cppsv.c + lapacke_cppsv_work.c + lapacke_cppsvx.c + lapacke_cppsvx_work.c + lapacke_cpptrf.c + lapacke_cpptrf_work.c + lapacke_cpptri.c + lapacke_cpptri_work.c + lapacke_cpptrs.c + lapacke_cpptrs_work.c + lapacke_cpstrf.c + lapacke_cpstrf_work.c + lapacke_cptcon.c + lapacke_cptcon_work.c + lapacke_cpteqr.c + lapacke_cpteqr_work.c + lapacke_cptrfs.c + lapacke_cptrfs_work.c + lapacke_cptsv.c + lapacke_cptsv_work.c + lapacke_cptsvx.c + lapacke_cptsvx_work.c + lapacke_cpttrf.c + lapacke_cpttrf_work.c + lapacke_cpttrs.c + lapacke_cpttrs_work.c + lapacke_cspcon.c + lapacke_cspcon_work.c + lapacke_csprfs.c + lapacke_csprfs_work.c + lapacke_cspsv.c + lapacke_cspsv_work.c + lapacke_cspsvx.c + lapacke_cspsvx_work.c + lapacke_csptrf.c + lapacke_csptrf_work.c + lapacke_csptri.c + lapacke_csptri_work.c + lapacke_csptrs.c + lapacke_csptrs_work.c + lapacke_cstedc.c + lapacke_cstedc_work.c + lapacke_cstegr.c + lapacke_cstegr_work.c + lapacke_cstein.c + lapacke_cstein_work.c + lapacke_cstemr.c + lapacke_cstemr_work.c + lapacke_csteqr.c + lapacke_csteqr_work.c + lapacke_csycon.c + lapacke_csycon_work.c + lapacke_csyconv.c + lapacke_csyconv_work.c + lapacke_csyequb.c + lapacke_csyequb_work.c + lapacke_csyrfs.c + lapacke_csyrfs_work.c + lapacke_csysv.c + lapacke_csysv_rook.c + lapacke_csysv_rook_work.c + lapacke_csysv_work.c + lapacke_csysvx.c + lapacke_csysvx_work.c + lapacke_csyswapr.c + lapacke_csyswapr_work.c + lapacke_csytrf.c + lapacke_csytrf_work.c + lapacke_csytri.c + lapacke_csytri2.c + lapacke_csytri2_work.c + lapacke_csytri2x.c + lapacke_csytri2x_work.c + lapacke_csytri_work.c + lapacke_csytrs.c + lapacke_csytrs2.c + lapacke_csytrs2_work.c + lapacke_csytrs_work.c + lapacke_ctbcon.c + lapacke_ctbcon_work.c + lapacke_ctbrfs.c + lapacke_ctbrfs_work.c + lapacke_ctbtrs.c + lapacke_ctbtrs_work.c + lapacke_ctfsm.c + lapacke_ctfsm_work.c + lapacke_ctftri.c + lapacke_ctftri_work.c + lapacke_ctfttp.c + lapacke_ctfttp_work.c + lapacke_ctfttr.c + lapacke_ctfttr_work.c + lapacke_ctgevc.c + lapacke_ctgevc_work.c + lapacke_ctgexc.c + lapacke_ctgexc_work.c + lapacke_ctgsen.c + lapacke_ctgsen_work.c + lapacke_ctgsja.c + lapacke_ctgsja_work.c + lapacke_ctgsna.c + lapacke_ctgsna_work.c + lapacke_ctgsyl.c + lapacke_ctgsyl_work.c + lapacke_ctpcon.c + lapacke_ctpcon_work.c + lapacke_ctpmqrt.c + lapacke_ctpmqrt_work.c + lapacke_ctpqrt.c + lapacke_ctpqrt2.c + lapacke_ctpqrt2_work.c + lapacke_ctpqrt_work.c + lapacke_ctprfb.c + lapacke_ctprfb_work.c + lapacke_ctprfs.c + lapacke_ctprfs_work.c + lapacke_ctptri.c + lapacke_ctptri_work.c + lapacke_ctptrs.c + lapacke_ctptrs_work.c + lapacke_ctpttf.c + lapacke_ctpttf_work.c + lapacke_ctpttr.c + lapacke_ctpttr_work.c + lapacke_ctrcon.c + lapacke_ctrcon_work.c + lapacke_ctrevc.c + lapacke_ctrevc_work.c + lapacke_ctrexc.c + lapacke_ctrexc_work.c + lapacke_ctrrfs.c + lapacke_ctrrfs_work.c + lapacke_ctrsen.c + lapacke_ctrsen_work.c + lapacke_ctrsna.c + lapacke_ctrsna_work.c + lapacke_ctrsyl.c + lapacke_ctrsyl_work.c + lapacke_ctrtri.c + lapacke_ctrtri_work.c + lapacke_ctrtrs.c + lapacke_ctrtrs_work.c + lapacke_ctrttf.c + lapacke_ctrttf_work.c + lapacke_ctrttp.c + lapacke_ctrttp_work.c + lapacke_ctzrzf.c + lapacke_ctzrzf_work.c + lapacke_cunbdb.c + lapacke_cunbdb_work.c + lapacke_cuncsd.c + lapacke_cuncsd_work.c + lapacke_cungbr.c + lapacke_cungbr_work.c + lapacke_cunghr.c + lapacke_cunghr_work.c + lapacke_cunglq.c + lapacke_cunglq_work.c + lapacke_cungql.c + lapacke_cungql_work.c + lapacke_cungqr.c + lapacke_cungqr_work.c + lapacke_cungrq.c + lapacke_cungrq_work.c + lapacke_cungtr.c + lapacke_cungtr_work.c + lapacke_cunmbr.c + lapacke_cunmbr_work.c + lapacke_cunmhr.c + lapacke_cunmhr_work.c + lapacke_cunmlq.c + lapacke_cunmlq_work.c + lapacke_cunmql.c + lapacke_cunmql_work.c + lapacke_cunmqr.c + lapacke_cunmqr_work.c + lapacke_cunmrq.c + lapacke_cunmrq_work.c + lapacke_cunmrz.c + lapacke_cunmrz_work.c + lapacke_cunmtr.c + lapacke_cunmtr_work.c + lapacke_cupgtr.c + lapacke_cupgtr_work.c + lapacke_cupmtr.c + lapacke_cupmtr_work.c +) + +set(DSRC + lapacke_dbbcsd.c + lapacke_dbbcsd_work.c + lapacke_dbdsdc.c + lapacke_dbdsdc_work.c + lapacke_dbdsqr.c + lapacke_dbdsqr_work.c + lapacke_ddisna.c + lapacke_ddisna_work.c + lapacke_dgbbrd.c + lapacke_dgbbrd_work.c + lapacke_dgbcon.c + lapacke_dgbcon_work.c + lapacke_dgbequ.c + lapacke_dgbequ_work.c + lapacke_dgbequb.c + lapacke_dgbequb_work.c + lapacke_dgbrfs.c + lapacke_dgbrfs_work.c + lapacke_dgbsv.c + lapacke_dgbsv_work.c + lapacke_dgbsvx.c + lapacke_dgbsvx_work.c + lapacke_dgbtrf.c + lapacke_dgbtrf_work.c + lapacke_dgbtrs.c + lapacke_dgbtrs_work.c + lapacke_dgebak.c + lapacke_dgebak_work.c + lapacke_dgebal.c + lapacke_dgebal_work.c + lapacke_dgebrd.c + lapacke_dgebrd_work.c + lapacke_dgecon.c + lapacke_dgecon_work.c + lapacke_dgeequ.c + lapacke_dgeequ_work.c + lapacke_dgeequb.c + lapacke_dgeequb_work.c + lapacke_dgees.c + lapacke_dgees_work.c + lapacke_dgeesx.c + lapacke_dgeesx_work.c + lapacke_dgeev.c + lapacke_dgeev_work.c + lapacke_dgeevx.c + lapacke_dgeevx_work.c + lapacke_dgehrd.c + lapacke_dgehrd_work.c + lapacke_dgejsv.c + lapacke_dgejsv_work.c + lapacke_dgelq2.c + lapacke_dgelq2_work.c + lapacke_dgelqf.c + lapacke_dgelqf_work.c + lapacke_dgels.c + lapacke_dgels_work.c + lapacke_dgelsd.c + lapacke_dgelsd_work.c + lapacke_dgelss.c + lapacke_dgelss_work.c + lapacke_dgelsy.c + lapacke_dgelsy_work.c + lapacke_dgemqrt.c + lapacke_dgemqrt_work.c + lapacke_dgeqlf.c + lapacke_dgeqlf_work.c + lapacke_dgeqp3.c + lapacke_dgeqp3_work.c + lapacke_dgeqpf.c + lapacke_dgeqpf_work.c + lapacke_dgeqr2.c + lapacke_dgeqr2_work.c + lapacke_dgeqrf.c + lapacke_dgeqrf_work.c + lapacke_dgeqrfp.c + lapacke_dgeqrfp_work.c + lapacke_dgeqrt.c + lapacke_dgeqrt2.c + lapacke_dgeqrt2_work.c + lapacke_dgeqrt3.c + lapacke_dgeqrt3_work.c + lapacke_dgeqrt_work.c + lapacke_dgerfs.c + lapacke_dgerfs_work.c + lapacke_dgerqf.c + lapacke_dgerqf_work.c + lapacke_dgesdd.c + lapacke_dgesdd_work.c + lapacke_dgesv.c + lapacke_dgesv_work.c + lapacke_dgesvd.c + lapacke_dgesvd_work.c + lapacke_dgesvj.c + lapacke_dgesvj_work.c + lapacke_dgesvx.c + lapacke_dgesvx_work.c + lapacke_dgetf2.c + lapacke_dgetf2_work.c + lapacke_dgetrf.c + lapacke_dgetrf_work.c + lapacke_dgetri.c + lapacke_dgetri_work.c + lapacke_dgetrs.c + lapacke_dgetrs_work.c + lapacke_dggbak.c + lapacke_dggbak_work.c + lapacke_dggbal.c + lapacke_dggbal_work.c + lapacke_dgges.c + lapacke_dgges_work.c + lapacke_dggesx.c + lapacke_dggesx_work.c + lapacke_dggev.c + lapacke_dggev_work.c + lapacke_dggevx.c + lapacke_dggevx_work.c + lapacke_dggglm.c + lapacke_dggglm_work.c + lapacke_dgghrd.c + lapacke_dgghrd_work.c + lapacke_dgglse.c + lapacke_dgglse_work.c + lapacke_dggqrf.c + lapacke_dggqrf_work.c + lapacke_dggrqf.c + lapacke_dggrqf_work.c + lapacke_dggsvd.c + lapacke_dggsvd_work.c + lapacke_dggsvp.c + lapacke_dggsvp_work.c + lapacke_dgtcon.c + lapacke_dgtcon_work.c + lapacke_dgtrfs.c + lapacke_dgtrfs_work.c + lapacke_dgtsv.c + lapacke_dgtsv_work.c + lapacke_dgtsvx.c + lapacke_dgtsvx_work.c + lapacke_dgttrf.c + lapacke_dgttrf_work.c + lapacke_dgttrs.c + lapacke_dgttrs_work.c + lapacke_dhgeqz.c + lapacke_dhgeqz_work.c + lapacke_dhsein.c + lapacke_dhsein_work.c + lapacke_dhseqr.c + lapacke_dhseqr_work.c + lapacke_dlacn2.c + lapacke_dlacn2_work.c + lapacke_dlacpy.c + lapacke_dlacpy_work.c + lapacke_dlag2s.c + lapacke_dlag2s_work.c + lapacke_dlamch.c + lapacke_dlamch_work.c + lapacke_dlange.c + lapacke_dlange_work.c + lapacke_dlansy.c + lapacke_dlansy_work.c + lapacke_dlantr.c + lapacke_dlantr_work.c + lapacke_dlapmr.c + lapacke_dlapmr_work.c + lapacke_dlapy2.c + lapacke_dlapy2_work.c + lapacke_dlapy3.c + lapacke_dlapy3_work.c + lapacke_dlarfb.c + lapacke_dlarfb_work.c + lapacke_dlarfg.c + lapacke_dlarfg_work.c + lapacke_dlarft.c + lapacke_dlarft_work.c + lapacke_dlarfx.c + lapacke_dlarfx_work.c + lapacke_dlarnv.c + lapacke_dlarnv_work.c + lapacke_dlartgp.c + lapacke_dlartgp_work.c + lapacke_dlartgs.c + lapacke_dlartgs_work.c + lapacke_dlaset.c + lapacke_dlaset_work.c + lapacke_dlasrt.c + lapacke_dlasrt_work.c + lapacke_dlaswp.c + lapacke_dlaswp_work.c + lapacke_dlauum.c + lapacke_dlauum_work.c + lapacke_dopgtr.c + lapacke_dopgtr_work.c + lapacke_dopmtr.c + lapacke_dopmtr_work.c + lapacke_dorbdb.c + lapacke_dorbdb_work.c + lapacke_dorcsd.c + lapacke_dorcsd_work.c + lapacke_dorgbr.c + lapacke_dorgbr_work.c + lapacke_dorghr.c + lapacke_dorghr_work.c + lapacke_dorglq.c + lapacke_dorglq_work.c + lapacke_dorgql.c + lapacke_dorgql_work.c + lapacke_dorgqr.c + lapacke_dorgqr_work.c + lapacke_dorgrq.c + lapacke_dorgrq_work.c + lapacke_dorgtr.c + lapacke_dorgtr_work.c + lapacke_dormbr.c + lapacke_dormbr_work.c + lapacke_dormhr.c + lapacke_dormhr_work.c + lapacke_dormlq.c + lapacke_dormlq_work.c + lapacke_dormql.c + lapacke_dormql_work.c + lapacke_dormqr.c + lapacke_dormqr_work.c + lapacke_dormrq.c + lapacke_dormrq_work.c + lapacke_dormrz.c + lapacke_dormrz_work.c + lapacke_dormtr.c + lapacke_dormtr_work.c + lapacke_dpbcon.c + lapacke_dpbcon_work.c + lapacke_dpbequ.c + lapacke_dpbequ_work.c + lapacke_dpbrfs.c + lapacke_dpbrfs_work.c + lapacke_dpbstf.c + lapacke_dpbstf_work.c + lapacke_dpbsv.c + lapacke_dpbsv_work.c + lapacke_dpbsvx.c + lapacke_dpbsvx_work.c + lapacke_dpbtrf.c + lapacke_dpbtrf_work.c + lapacke_dpbtrs.c + lapacke_dpbtrs_work.c + lapacke_dpftrf.c + lapacke_dpftrf_work.c + lapacke_dpftri.c + lapacke_dpftri_work.c + lapacke_dpftrs.c + lapacke_dpftrs_work.c + lapacke_dpocon.c + lapacke_dpocon_work.c + lapacke_dpoequ.c + lapacke_dpoequ_work.c + lapacke_dpoequb.c + lapacke_dpoequb_work.c + lapacke_dporfs.c + lapacke_dporfs_work.c + lapacke_dposv.c + lapacke_dposv_work.c + lapacke_dposvx.c + lapacke_dposvx_work.c + lapacke_dpotrf.c + lapacke_dpotrf_work.c + lapacke_dpotri.c + lapacke_dpotri_work.c + lapacke_dpotrs.c + lapacke_dpotrs_work.c + lapacke_dppcon.c + lapacke_dppcon_work.c + lapacke_dppequ.c + lapacke_dppequ_work.c + lapacke_dpprfs.c + lapacke_dpprfs_work.c + lapacke_dppsv.c + lapacke_dppsv_work.c + lapacke_dppsvx.c + lapacke_dppsvx_work.c + lapacke_dpptrf.c + lapacke_dpptrf_work.c + lapacke_dpptri.c + lapacke_dpptri_work.c + lapacke_dpptrs.c + lapacke_dpptrs_work.c + lapacke_dpstrf.c + lapacke_dpstrf_work.c + lapacke_dptcon.c + lapacke_dptcon_work.c + lapacke_dpteqr.c + lapacke_dpteqr_work.c + lapacke_dptrfs.c + lapacke_dptrfs_work.c + lapacke_dptsv.c + lapacke_dptsv_work.c + lapacke_dptsvx.c + lapacke_dptsvx_work.c + lapacke_dpttrf.c + lapacke_dpttrf_work.c + lapacke_dpttrs.c + lapacke_dpttrs_work.c + lapacke_dsbev.c + lapacke_dsbev_work.c + lapacke_dsbevd.c + lapacke_dsbevd_work.c + lapacke_dsbevx.c + lapacke_dsbevx_work.c + lapacke_dsbgst.c + lapacke_dsbgst_work.c + lapacke_dsbgv.c + lapacke_dsbgv_work.c + lapacke_dsbgvd.c + lapacke_dsbgvd_work.c + lapacke_dsbgvx.c + lapacke_dsbgvx_work.c + lapacke_dsbtrd.c + lapacke_dsbtrd_work.c + lapacke_dsfrk.c + lapacke_dsfrk_work.c + lapacke_dsgesv.c + lapacke_dsgesv_work.c + lapacke_dspcon.c + lapacke_dspcon_work.c + lapacke_dspev.c + lapacke_dspev_work.c + lapacke_dspevd.c + lapacke_dspevd_work.c + lapacke_dspevx.c + lapacke_dspevx_work.c + lapacke_dspgst.c + lapacke_dspgst_work.c + lapacke_dspgv.c + lapacke_dspgv_work.c + lapacke_dspgvd.c + lapacke_dspgvd_work.c + lapacke_dspgvx.c + lapacke_dspgvx_work.c + lapacke_dsposv.c + lapacke_dsposv_work.c + lapacke_dsprfs.c + lapacke_dsprfs_work.c + lapacke_dspsv.c + lapacke_dspsv_work.c + lapacke_dspsvx.c + lapacke_dspsvx_work.c + lapacke_dsptrd.c + lapacke_dsptrd_work.c + lapacke_dsptrf.c + lapacke_dsptrf_work.c + lapacke_dsptri.c + lapacke_dsptri_work.c + lapacke_dsptrs.c + lapacke_dsptrs_work.c + lapacke_dstebz.c + lapacke_dstebz_work.c + lapacke_dstedc.c + lapacke_dstedc_work.c + lapacke_dstegr.c + lapacke_dstegr_work.c + lapacke_dstein.c + lapacke_dstein_work.c + lapacke_dstemr.c + lapacke_dstemr_work.c + lapacke_dsteqr.c + lapacke_dsteqr_work.c + lapacke_dsterf.c + lapacke_dsterf_work.c + lapacke_dstev.c + lapacke_dstev_work.c + lapacke_dstevd.c + lapacke_dstevd_work.c + lapacke_dstevr.c + lapacke_dstevr_work.c + lapacke_dstevx.c + lapacke_dstevx_work.c + lapacke_dsycon.c + lapacke_dsycon_work.c + lapacke_dsyconv.c + lapacke_dsyconv_work.c + lapacke_dsyequb.c + lapacke_dsyequb_work.c + lapacke_dsyev.c + lapacke_dsyev_work.c + lapacke_dsyevd.c + lapacke_dsyevd_work.c + lapacke_dsyevr.c + lapacke_dsyevr_work.c + lapacke_dsyevx.c + lapacke_dsyevx_work.c + lapacke_dsygst.c + lapacke_dsygst_work.c + lapacke_dsygv.c + lapacke_dsygv_work.c + lapacke_dsygvd.c + lapacke_dsygvd_work.c + lapacke_dsygvx.c + lapacke_dsygvx_work.c + lapacke_dsyrfs.c + lapacke_dsyrfs_work.c + lapacke_dsysv.c + lapacke_dsysv_rook.c + lapacke_dsysv_rook_work.c + lapacke_dsysv_work.c + lapacke_dsysvx.c + lapacke_dsysvx_work.c + lapacke_dsyswapr.c + lapacke_dsyswapr_work.c + lapacke_dsytrd.c + lapacke_dsytrd_work.c + lapacke_dsytrf.c + lapacke_dsytrf_work.c + lapacke_dsytri.c + lapacke_dsytri2.c + lapacke_dsytri2_work.c + lapacke_dsytri2x.c + lapacke_dsytri2x_work.c + lapacke_dsytri_work.c + lapacke_dsytrs.c + lapacke_dsytrs2.c + lapacke_dsytrs2_work.c + lapacke_dsytrs_work.c + lapacke_dtbcon.c + lapacke_dtbcon_work.c + lapacke_dtbrfs.c + lapacke_dtbrfs_work.c + lapacke_dtbtrs.c + lapacke_dtbtrs_work.c + lapacke_dtfsm.c + lapacke_dtfsm_work.c + lapacke_dtftri.c + lapacke_dtftri_work.c + lapacke_dtfttp.c + lapacke_dtfttp_work.c + lapacke_dtfttr.c + lapacke_dtfttr_work.c + lapacke_dtgevc.c + lapacke_dtgevc_work.c + lapacke_dtgexc.c + lapacke_dtgexc_work.c + lapacke_dtgsen.c + lapacke_dtgsen_work.c + lapacke_dtgsja.c + lapacke_dtgsja_work.c + lapacke_dtgsna.c + lapacke_dtgsna_work.c + lapacke_dtgsyl.c + lapacke_dtgsyl_work.c + lapacke_dtpcon.c + lapacke_dtpcon_work.c + lapacke_dtpmqrt.c + lapacke_dtpmqrt_work.c + lapacke_dtpqrt.c + lapacke_dtpqrt2.c + lapacke_dtpqrt2_work.c + lapacke_dtpqrt_work.c + lapacke_dtprfb.c + lapacke_dtprfb_work.c + lapacke_dtprfs.c + lapacke_dtprfs_work.c + lapacke_dtptri.c + lapacke_dtptri_work.c + lapacke_dtptrs.c + lapacke_dtptrs_work.c + lapacke_dtpttf.c + lapacke_dtpttf_work.c + lapacke_dtpttr.c + lapacke_dtpttr_work.c + lapacke_dtrcon.c + lapacke_dtrcon_work.c + lapacke_dtrevc.c + lapacke_dtrevc_work.c + lapacke_dtrexc.c + lapacke_dtrexc_work.c + lapacke_dtrrfs.c + lapacke_dtrrfs_work.c + lapacke_dtrsen.c + lapacke_dtrsen_work.c + lapacke_dtrsna.c + lapacke_dtrsna_work.c + lapacke_dtrsyl.c + lapacke_dtrsyl_work.c + lapacke_dtrtri.c + lapacke_dtrtri_work.c + lapacke_dtrtrs.c + lapacke_dtrtrs_work.c + lapacke_dtrttf.c + lapacke_dtrttf_work.c + lapacke_dtrttp.c + lapacke_dtrttp_work.c + lapacke_dtzrzf.c + lapacke_dtzrzf_work.c +) + +set(SSRC + lapacke_sbbcsd.c + lapacke_sbbcsd_work.c + lapacke_sbdsdc.c + lapacke_sbdsdc_work.c + lapacke_sbdsqr.c + lapacke_sbdsqr_work.c + lapacke_sdisna.c + lapacke_sdisna_work.c + lapacke_sgbbrd.c + lapacke_sgbbrd_work.c + lapacke_sgbcon.c + lapacke_sgbcon_work.c + lapacke_sgbequ.c + lapacke_sgbequ_work.c + lapacke_sgbequb.c + lapacke_sgbequb_work.c + lapacke_sgbrfs.c + lapacke_sgbrfs_work.c + lapacke_sgbsv.c + lapacke_sgbsv_work.c + lapacke_sgbsvx.c + lapacke_sgbsvx_work.c + lapacke_sgbtrf.c + lapacke_sgbtrf_work.c + lapacke_sgbtrs.c + lapacke_sgbtrs_work.c + lapacke_sgebak.c + lapacke_sgebak_work.c + lapacke_sgebal.c + lapacke_sgebal_work.c + lapacke_sgebrd.c + lapacke_sgebrd_work.c + lapacke_sgecon.c + lapacke_sgecon_work.c + lapacke_sgeequ.c + lapacke_sgeequ_work.c + lapacke_sgeequb.c + lapacke_sgeequb_work.c + lapacke_sgees.c + lapacke_sgees_work.c + lapacke_sgeesx.c + lapacke_sgeesx_work.c + lapacke_sgeev.c + lapacke_sgeev_work.c + lapacke_sgeevx.c + lapacke_sgeevx_work.c + lapacke_sgehrd.c + lapacke_sgehrd_work.c + lapacke_sgejsv.c + lapacke_sgejsv_work.c + lapacke_sgelq2.c + lapacke_sgelq2_work.c + lapacke_sgelqf.c + lapacke_sgelqf_work.c + lapacke_sgels.c + lapacke_sgels_work.c + lapacke_sgelsd.c + lapacke_sgelsd_work.c + lapacke_sgelss.c + lapacke_sgelss_work.c + lapacke_sgelsy.c + lapacke_sgelsy_work.c + lapacke_sgemqrt.c + lapacke_sgemqrt_work.c + lapacke_sgeqlf.c + lapacke_sgeqlf_work.c + lapacke_sgeqp3.c + lapacke_sgeqp3_work.c + lapacke_sgeqpf.c + lapacke_sgeqpf_work.c + lapacke_sgeqr2.c + lapacke_sgeqr2_work.c + lapacke_sgeqrf.c + lapacke_sgeqrf_work.c + lapacke_sgeqrfp.c + lapacke_sgeqrfp_work.c + lapacke_sgeqrt.c + lapacke_sgeqrt2.c + lapacke_sgeqrt2_work.c + lapacke_sgeqrt3.c + lapacke_sgeqrt3_work.c + lapacke_sgeqrt_work.c + lapacke_sgerfs.c + lapacke_sgerfs_work.c + lapacke_sgerqf.c + lapacke_sgerqf_work.c + lapacke_sgesdd.c + lapacke_sgesdd_work.c + lapacke_sgesv.c + lapacke_sgesv_work.c + lapacke_sgesvd.c + lapacke_sgesvd_work.c + lapacke_sgesvj.c + lapacke_sgesvj_work.c + lapacke_sgesvx.c + lapacke_sgesvx_work.c + lapacke_sgetf2.c + lapacke_sgetf2_work.c + lapacke_sgetrf.c + lapacke_sgetrf_work.c + lapacke_sgetri.c + lapacke_sgetri_work.c + lapacke_sgetrs.c + lapacke_sgetrs_work.c + lapacke_sggbak.c + lapacke_sggbak_work.c + lapacke_sggbal.c + lapacke_sggbal_work.c + lapacke_sgges.c + lapacke_sgges_work.c + lapacke_sggesx.c + lapacke_sggesx_work.c + lapacke_sggev.c + lapacke_sggev_work.c + lapacke_sggevx.c + lapacke_sggevx_work.c + lapacke_sggglm.c + lapacke_sggglm_work.c + lapacke_sgghrd.c + lapacke_sgghrd_work.c + lapacke_sgglse.c + lapacke_sgglse_work.c + lapacke_sggqrf.c + lapacke_sggqrf_work.c + lapacke_sggrqf.c + lapacke_sggrqf_work.c + lapacke_sggsvd.c + lapacke_sggsvd_work.c + lapacke_sggsvp.c + lapacke_sggsvp_work.c + lapacke_sgtcon.c + lapacke_sgtcon_work.c + lapacke_sgtrfs.c + lapacke_sgtrfs_work.c + lapacke_sgtsv.c + lapacke_sgtsv_work.c + lapacke_sgtsvx.c + lapacke_sgtsvx_work.c + lapacke_sgttrf.c + lapacke_sgttrf_work.c + lapacke_sgttrs.c + lapacke_sgttrs_work.c + lapacke_shgeqz.c + lapacke_shgeqz_work.c + lapacke_shsein.c + lapacke_shsein_work.c + lapacke_shseqr.c + lapacke_shseqr_work.c + lapacke_slacn2.c + lapacke_slacn2_work.c + lapacke_slacpy.c + lapacke_slacpy_work.c + lapacke_slag2d.c + lapacke_slag2d_work.c + lapacke_slamch.c + lapacke_slamch_work.c + lapacke_slange.c + lapacke_slange_work.c + lapacke_slansy.c + lapacke_slansy_work.c + lapacke_slantr.c + lapacke_slantr_work.c + lapacke_slapmr.c + lapacke_slapmr_work.c + lapacke_slapy2.c + lapacke_slapy2_work.c + lapacke_slapy3.c + lapacke_slapy3_work.c + lapacke_slarfb.c + lapacke_slarfb_work.c + lapacke_slarfg.c + lapacke_slarfg_work.c + lapacke_slarft.c + lapacke_slarft_work.c + lapacke_slarfx.c + lapacke_slarfx_work.c + lapacke_slarnv.c + lapacke_slarnv_work.c + lapacke_slartgp.c + lapacke_slartgp_work.c + lapacke_slartgs.c + lapacke_slartgs_work.c + lapacke_slaset.c + lapacke_slaset_work.c + lapacke_slasrt.c + lapacke_slasrt_work.c + lapacke_slaswp.c + lapacke_slaswp_work.c + lapacke_slauum.c + lapacke_slauum_work.c + lapacke_sopgtr.c + lapacke_sopgtr_work.c + lapacke_sopmtr.c + lapacke_sopmtr_work.c + lapacke_sorbdb.c + lapacke_sorbdb_work.c + lapacke_sorcsd.c + lapacke_sorcsd_work.c + lapacke_sorgbr.c + lapacke_sorgbr_work.c + lapacke_sorghr.c + lapacke_sorghr_work.c + lapacke_sorglq.c + lapacke_sorglq_work.c + lapacke_sorgql.c + lapacke_sorgql_work.c + lapacke_sorgqr.c + lapacke_sorgqr_work.c + lapacke_sorgrq.c + lapacke_sorgrq_work.c + lapacke_sorgtr.c + lapacke_sorgtr_work.c + lapacke_sormbr.c + lapacke_sormbr_work.c + lapacke_sormhr.c + lapacke_sormhr_work.c + lapacke_sormlq.c + lapacke_sormlq_work.c + lapacke_sormql.c + lapacke_sormql_work.c + lapacke_sormqr.c + lapacke_sormqr_work.c + lapacke_sormrq.c + lapacke_sormrq_work.c + lapacke_sormrz.c + lapacke_sormrz_work.c + lapacke_sormtr.c + lapacke_sormtr_work.c + lapacke_spbcon.c + lapacke_spbcon_work.c + lapacke_spbequ.c + lapacke_spbequ_work.c + lapacke_spbrfs.c + lapacke_spbrfs_work.c + lapacke_spbstf.c + lapacke_spbstf_work.c + lapacke_spbsv.c + lapacke_spbsv_work.c + lapacke_spbsvx.c + lapacke_spbsvx_work.c + lapacke_spbtrf.c + lapacke_spbtrf_work.c + lapacke_spbtrs.c + lapacke_spbtrs_work.c + lapacke_spftrf.c + lapacke_spftrf_work.c + lapacke_spftri.c + lapacke_spftri_work.c + lapacke_spftrs.c + lapacke_spftrs_work.c + lapacke_spocon.c + lapacke_spocon_work.c + lapacke_spoequ.c + lapacke_spoequ_work.c + lapacke_spoequb.c + lapacke_spoequb_work.c + lapacke_sporfs.c + lapacke_sporfs_work.c + lapacke_sposv.c + lapacke_sposv_work.c + lapacke_sposvx.c + lapacke_sposvx_work.c + lapacke_spotrf.c + lapacke_spotrf_work.c + lapacke_spotri.c + lapacke_spotri_work.c + lapacke_spotrs.c + lapacke_spotrs_work.c + lapacke_sppcon.c + lapacke_sppcon_work.c + lapacke_sppequ.c + lapacke_sppequ_work.c + lapacke_spprfs.c + lapacke_spprfs_work.c + lapacke_sppsv.c + lapacke_sppsv_work.c + lapacke_sppsvx.c + lapacke_sppsvx_work.c + lapacke_spptrf.c + lapacke_spptrf_work.c + lapacke_spptri.c + lapacke_spptri_work.c + lapacke_spptrs.c + lapacke_spptrs_work.c + lapacke_spstrf.c + lapacke_spstrf_work.c + lapacke_sptcon.c + lapacke_sptcon_work.c + lapacke_spteqr.c + lapacke_spteqr_work.c + lapacke_sptrfs.c + lapacke_sptrfs_work.c + lapacke_sptsv.c + lapacke_sptsv_work.c + lapacke_sptsvx.c + lapacke_sptsvx_work.c + lapacke_spttrf.c + lapacke_spttrf_work.c + lapacke_spttrs.c + lapacke_spttrs_work.c + lapacke_ssbev.c + lapacke_ssbev_work.c + lapacke_ssbevd.c + lapacke_ssbevd_work.c + lapacke_ssbevx.c + lapacke_ssbevx_work.c + lapacke_ssbgst.c + lapacke_ssbgst_work.c + lapacke_ssbgv.c + lapacke_ssbgv_work.c + lapacke_ssbgvd.c + lapacke_ssbgvd_work.c + lapacke_ssbgvx.c + lapacke_ssbgvx_work.c + lapacke_ssbtrd.c + lapacke_ssbtrd_work.c + lapacke_ssfrk.c + lapacke_ssfrk_work.c + lapacke_sspcon.c + lapacke_sspcon_work.c + lapacke_sspev.c + lapacke_sspev_work.c + lapacke_sspevd.c + lapacke_sspevd_work.c + lapacke_sspevx.c + lapacke_sspevx_work.c + lapacke_sspgst.c + lapacke_sspgst_work.c + lapacke_sspgv.c + lapacke_sspgv_work.c + lapacke_sspgvd.c + lapacke_sspgvd_work.c + lapacke_sspgvx.c + lapacke_sspgvx_work.c + lapacke_ssprfs.c + lapacke_ssprfs_work.c + lapacke_sspsv.c + lapacke_sspsv_work.c + lapacke_sspsvx.c + lapacke_sspsvx_work.c + lapacke_ssptrd.c + lapacke_ssptrd_work.c + lapacke_ssptrf.c + lapacke_ssptrf_work.c + lapacke_ssptri.c + lapacke_ssptri_work.c + lapacke_ssptrs.c + lapacke_ssptrs_work.c + lapacke_sstebz.c + lapacke_sstebz_work.c + lapacke_sstedc.c + lapacke_sstedc_work.c + lapacke_sstegr.c + lapacke_sstegr_work.c + lapacke_sstein.c + lapacke_sstein_work.c + lapacke_sstemr.c + lapacke_sstemr_work.c + lapacke_ssteqr.c + lapacke_ssteqr_work.c + lapacke_ssterf.c + lapacke_ssterf_work.c + lapacke_sstev.c + lapacke_sstev_work.c + lapacke_sstevd.c + lapacke_sstevd_work.c + lapacke_sstevr.c + lapacke_sstevr_work.c + lapacke_sstevx.c + lapacke_sstevx_work.c + lapacke_ssycon.c + lapacke_ssycon_work.c + lapacke_ssyconv.c + lapacke_ssyconv_work.c + lapacke_ssyequb.c + lapacke_ssyequb_work.c + lapacke_ssyev.c + lapacke_ssyev_work.c + lapacke_ssyevd.c + lapacke_ssyevd_work.c + lapacke_ssyevr.c + lapacke_ssyevr_work.c + lapacke_ssyevx.c + lapacke_ssyevx_work.c + lapacke_ssygst.c + lapacke_ssygst_work.c + lapacke_ssygv.c + lapacke_ssygv_work.c + lapacke_ssygvd.c + lapacke_ssygvd_work.c + lapacke_ssygvx.c + lapacke_ssygvx_work.c + lapacke_ssyrfs.c + lapacke_ssyrfs_work.c + lapacke_ssysv.c + lapacke_ssysv_rook.c + lapacke_ssysv_rook_work.c + lapacke_ssysv_work.c + lapacke_ssysvx.c + lapacke_ssysvx_work.c + lapacke_ssyswapr.c + lapacke_ssyswapr_work.c + lapacke_ssytrd.c + lapacke_ssytrd_work.c + lapacke_ssytrf.c + lapacke_ssytrf_work.c + lapacke_ssytri.c + lapacke_ssytri2.c + lapacke_ssytri2_work.c + lapacke_ssytri2x.c + lapacke_ssytri2x_work.c + lapacke_ssytri_work.c + lapacke_ssytrs.c + lapacke_ssytrs2.c + lapacke_ssytrs2_work.c + lapacke_ssytrs_work.c + lapacke_stbcon.c + lapacke_stbcon_work.c + lapacke_stbrfs.c + lapacke_stbrfs_work.c + lapacke_stbtrs.c + lapacke_stbtrs_work.c + lapacke_stfsm.c + lapacke_stfsm_work.c + lapacke_stftri.c + lapacke_stftri_work.c + lapacke_stfttp.c + lapacke_stfttp_work.c + lapacke_stfttr.c + lapacke_stfttr_work.c + lapacke_stgevc.c + lapacke_stgevc_work.c + lapacke_stgexc.c + lapacke_stgexc_work.c + lapacke_stgsen.c + lapacke_stgsen_work.c + lapacke_stgsja.c + lapacke_stgsja_work.c + lapacke_stgsna.c + lapacke_stgsna_work.c + lapacke_stgsyl.c + lapacke_stgsyl_work.c + lapacke_stpcon.c + lapacke_stpcon_work.c + lapacke_stpmqrt.c + lapacke_stpmqrt_work.c + lapacke_stpqrt2.c + lapacke_stpqrt2_work.c + lapacke_stprfb.c + lapacke_stprfb_work.c + lapacke_stprfs.c + lapacke_stprfs_work.c + lapacke_stptri.c + lapacke_stptri_work.c + lapacke_stptrs.c + lapacke_stptrs_work.c + lapacke_stpttf.c + lapacke_stpttf_work.c + lapacke_stpttr.c + lapacke_stpttr_work.c + lapacke_strcon.c + lapacke_strcon_work.c + lapacke_strevc.c + lapacke_strevc_work.c + lapacke_strexc.c + lapacke_strexc_work.c + lapacke_strrfs.c + lapacke_strrfs_work.c + lapacke_strsen.c + lapacke_strsen_work.c + lapacke_strsna.c + lapacke_strsna_work.c + lapacke_strsyl.c + lapacke_strsyl_work.c + lapacke_strtri.c + lapacke_strtri_work.c + lapacke_strtrs.c + lapacke_strtrs_work.c + lapacke_strttf.c + lapacke_strttf_work.c + lapacke_strttp.c + lapacke_strttp_work.c + lapacke_stzrzf.c + lapacke_stzrzf_work.c +) + +set(ZSRC + lapacke_zbbcsd.c + lapacke_zbbcsd_work.c + lapacke_zbdsqr.c + lapacke_zbdsqr_work.c + lapacke_zcgesv.c + lapacke_zcgesv_work.c + lapacke_zcposv.c + lapacke_zcposv_work.c + lapacke_zgbbrd.c + lapacke_zgbbrd_work.c + lapacke_zgbcon.c + lapacke_zgbcon_work.c + lapacke_zgbequ.c + lapacke_zgbequ_work.c + lapacke_zgbequb.c + lapacke_zgbequb_work.c + lapacke_zgbrfs.c + lapacke_zgbrfs_work.c + lapacke_zgbsv.c + lapacke_zgbsv_work.c + lapacke_zgbsvx.c + lapacke_zgbsvx_work.c + lapacke_zgbtrf.c + lapacke_zgbtrf_work.c + lapacke_zgbtrs.c + lapacke_zgbtrs_work.c + lapacke_zgebak.c + lapacke_zgebak_work.c + lapacke_zgebal.c + lapacke_zgebal_work.c + lapacke_zgebrd.c + lapacke_zgebrd_work.c + lapacke_zgecon.c + lapacke_zgecon_work.c + lapacke_zgeequ.c + lapacke_zgeequ_work.c + lapacke_zgeequb.c + lapacke_zgeequb_work.c + lapacke_zgees.c + lapacke_zgees_work.c + lapacke_zgeesx.c + lapacke_zgeesx_work.c + lapacke_zgeev.c + lapacke_zgeev_work.c + lapacke_zgeevx.c + lapacke_zgeevx_work.c + lapacke_zgehrd.c + lapacke_zgehrd_work.c + lapacke_zgelq2.c + lapacke_zgelq2_work.c + lapacke_zgelqf.c + lapacke_zgelqf_work.c + lapacke_zgels.c + lapacke_zgels_work.c + lapacke_zgelsd.c + lapacke_zgelsd_work.c + lapacke_zgelss.c + lapacke_zgelss_work.c + lapacke_zgelsy.c + lapacke_zgelsy_work.c + lapacke_zgemqrt.c + lapacke_zgemqrt_work.c + lapacke_zgeqlf.c + lapacke_zgeqlf_work.c + lapacke_zgeqp3.c + lapacke_zgeqp3_work.c + lapacke_zgeqpf.c + lapacke_zgeqpf_work.c + lapacke_zgeqr2.c + lapacke_zgeqr2_work.c + lapacke_zgeqrf.c + lapacke_zgeqrf_work.c + lapacke_zgeqrfp.c + lapacke_zgeqrfp_work.c + lapacke_zgeqrt.c + lapacke_zgeqrt2.c + lapacke_zgeqrt2_work.c + lapacke_zgeqrt3.c + lapacke_zgeqrt3_work.c + lapacke_zgeqrt_work.c + lapacke_zgerfs.c + lapacke_zgerfs_work.c + lapacke_zgerqf.c + lapacke_zgerqf_work.c + lapacke_zgesdd.c + lapacke_zgesdd_work.c + lapacke_zgesv.c + lapacke_zgesv_work.c + lapacke_zgesvd.c + lapacke_zgesvd_work.c + lapacke_zgesvx.c + lapacke_zgesvx_work.c + lapacke_zgetf2.c + lapacke_zgetf2_work.c + lapacke_zgetrf.c + lapacke_zgetrf_work.c + lapacke_zgetri.c + lapacke_zgetri_work.c + lapacke_zgetrs.c + lapacke_zgetrs_work.c + lapacke_zggbak.c + lapacke_zggbak_work.c + lapacke_zggbal.c + lapacke_zggbal_work.c + lapacke_zgges.c + lapacke_zgges_work.c + lapacke_zggesx.c + lapacke_zggesx_work.c + lapacke_zggev.c + lapacke_zggev_work.c + lapacke_zggevx.c + lapacke_zggevx_work.c + lapacke_zggglm.c + lapacke_zggglm_work.c + lapacke_zgghrd.c + lapacke_zgghrd_work.c + lapacke_zgglse.c + lapacke_zgglse_work.c + lapacke_zggqrf.c + lapacke_zggqrf_work.c + lapacke_zggrqf.c + lapacke_zggrqf_work.c + lapacke_zggsvd.c + lapacke_zggsvd_work.c + lapacke_zggsvp.c + lapacke_zggsvp_work.c + lapacke_zgtcon.c + lapacke_zgtcon_work.c + lapacke_zgtrfs.c + lapacke_zgtrfs_work.c + lapacke_zgtsv.c + lapacke_zgtsv_work.c + lapacke_zgtsvx.c + lapacke_zgtsvx_work.c + lapacke_zgttrf.c + lapacke_zgttrf_work.c + lapacke_zgttrs.c + lapacke_zgttrs_work.c + lapacke_zhbev.c + lapacke_zhbev_work.c + lapacke_zhbevd.c + lapacke_zhbevd_work.c + lapacke_zhbevx.c + lapacke_zhbevx_work.c + lapacke_zhbgst.c + lapacke_zhbgst_work.c + lapacke_zhbgv.c + lapacke_zhbgv_work.c + lapacke_zhbgvd.c + lapacke_zhbgvd_work.c + lapacke_zhbgvx.c + lapacke_zhbgvx_work.c + lapacke_zhbtrd.c + lapacke_zhbtrd_work.c + lapacke_zhecon.c + lapacke_zhecon_work.c + lapacke_zheequb.c + lapacke_zheequb_work.c + lapacke_zheev.c + lapacke_zheev_work.c + lapacke_zheevd.c + lapacke_zheevd_work.c + lapacke_zheevr.c + lapacke_zheevr_work.c + lapacke_zheevx.c + lapacke_zheevx_work.c + lapacke_zhegst.c + lapacke_zhegst_work.c + lapacke_zhegv.c + lapacke_zhegv_work.c + lapacke_zhegvd.c + lapacke_zhegvd_work.c + lapacke_zhegvx.c + lapacke_zhegvx_work.c + lapacke_zherfs.c + lapacke_zherfs_work.c + lapacke_zhesv.c + lapacke_zhesv_work.c + lapacke_zhesvx.c + lapacke_zhesvx_work.c + lapacke_zheswapr.c + lapacke_zheswapr_work.c + lapacke_zhetrd.c + lapacke_zhetrd_work.c + lapacke_zhetrf.c + lapacke_zhetrf_work.c + lapacke_zhetri.c + lapacke_zhetri2.c + lapacke_zhetri2_work.c + lapacke_zhetri2x.c + lapacke_zhetri2x_work.c + lapacke_zhetri_work.c + lapacke_zhetrs.c + lapacke_zhetrs2.c + lapacke_zhetrs2_work.c + lapacke_zhetrs_work.c + lapacke_zhfrk.c + lapacke_zhfrk_work.c + lapacke_zhgeqz.c + lapacke_zhgeqz_work.c + lapacke_zhpcon.c + lapacke_zhpcon_work.c + lapacke_zhpev.c + lapacke_zhpev_work.c + lapacke_zhpevd.c + lapacke_zhpevd_work.c + lapacke_zhpevx.c + lapacke_zhpevx_work.c + lapacke_zhpgst.c + lapacke_zhpgst_work.c + lapacke_zhpgv.c + lapacke_zhpgv_work.c + lapacke_zhpgvd.c + lapacke_zhpgvd_work.c + lapacke_zhpgvx.c + lapacke_zhpgvx_work.c + lapacke_zhprfs.c + lapacke_zhprfs_work.c + lapacke_zhpsv.c + lapacke_zhpsv_work.c + lapacke_zhpsvx.c + lapacke_zhpsvx_work.c + lapacke_zhptrd.c + lapacke_zhptrd_work.c + lapacke_zhptrf.c + lapacke_zhptrf_work.c + lapacke_zhptri.c + lapacke_zhptri_work.c + lapacke_zhptrs.c + lapacke_zhptrs_work.c + lapacke_zhsein.c + lapacke_zhsein_work.c + lapacke_zhseqr.c + lapacke_zhseqr_work.c + lapacke_zlacgv.c + lapacke_zlacgv_work.c + lapacke_zlacn2.c + lapacke_zlacn2_work.c + lapacke_zlacp2.c + lapacke_zlacp2_work.c + lapacke_zlacpy.c + lapacke_zlacpy_work.c + lapacke_zlag2c.c + lapacke_zlag2c_work.c + lapacke_zlange.c + lapacke_zlange_work.c + lapacke_zlanhe.c + lapacke_zlanhe_work.c + lapacke_zlansy.c + lapacke_zlansy_work.c + lapacke_zlantr.c + lapacke_zlantr_work.c + lapacke_zlapmr.c + lapacke_zlapmr_work.c + lapacke_zlarfb.c + lapacke_zlarfb_work.c + lapacke_zlarfg.c + lapacke_zlarfg_work.c + lapacke_zlarft.c + lapacke_zlarft_work.c + lapacke_zlarfx.c + lapacke_zlarfx_work.c + lapacke_zlarnv.c + lapacke_zlarnv_work.c + lapacke_zlaset.c + lapacke_zlaset_work.c + lapacke_zlaswp.c + lapacke_zlaswp_work.c + lapacke_zlauum.c + lapacke_zlauum_work.c + lapacke_zpbcon.c + lapacke_zpbcon_work.c + lapacke_zpbequ.c + lapacke_zpbequ_work.c + lapacke_zpbrfs.c + lapacke_zpbrfs_work.c + lapacke_zpbstf.c + lapacke_zpbstf_work.c + lapacke_zpbsv.c + lapacke_zpbsv_work.c + lapacke_zpbsvx.c + lapacke_zpbsvx_work.c + lapacke_zpbtrf.c + lapacke_zpbtrf_work.c + lapacke_zpbtrs.c + lapacke_zpbtrs_work.c + lapacke_zpftrf.c + lapacke_zpftrf_work.c + lapacke_zpftri.c + lapacke_zpftri_work.c + lapacke_zpftrs.c + lapacke_zpftrs_work.c + lapacke_zpocon.c + lapacke_zpocon_work.c + lapacke_zpoequ.c + lapacke_zpoequ_work.c + lapacke_zpoequb.c + lapacke_zpoequb_work.c + lapacke_zporfs.c + lapacke_zporfs_work.c + lapacke_zposv.c + lapacke_zposv_work.c + lapacke_zposvx.c + lapacke_zposvx_work.c + lapacke_zpotrf.c + lapacke_zpotrf_work.c + lapacke_zpotri.c + lapacke_zpotri_work.c + lapacke_zpotrs.c + lapacke_zpotrs_work.c + lapacke_zppcon.c + lapacke_zppcon_work.c + lapacke_zppequ.c + lapacke_zppequ_work.c + lapacke_zpprfs.c + lapacke_zpprfs_work.c + lapacke_zppsv.c + lapacke_zppsv_work.c + lapacke_zppsvx.c + lapacke_zppsvx_work.c + lapacke_zpptrf.c + lapacke_zpptrf_work.c + lapacke_zpptri.c + lapacke_zpptri_work.c + lapacke_zpptrs.c + lapacke_zpptrs_work.c + lapacke_zpstrf.c + lapacke_zpstrf_work.c + lapacke_zptcon.c + lapacke_zptcon_work.c + lapacke_zpteqr.c + lapacke_zpteqr_work.c + lapacke_zptrfs.c + lapacke_zptrfs_work.c + lapacke_zptsv.c + lapacke_zptsv_work.c + lapacke_zptsvx.c + lapacke_zptsvx_work.c + lapacke_zpttrf.c + lapacke_zpttrf_work.c + lapacke_zpttrs.c + lapacke_zpttrs_work.c + lapacke_zspcon.c + lapacke_zspcon_work.c + lapacke_zsprfs.c + lapacke_zsprfs_work.c + lapacke_zspsv.c + lapacke_zspsv_work.c + lapacke_zspsvx.c + lapacke_zspsvx_work.c + lapacke_zsptrf.c + lapacke_zsptrf_work.c + lapacke_zsptri.c + lapacke_zsptri_work.c + lapacke_zsptrs.c + lapacke_zsptrs_work.c + lapacke_zstedc.c + lapacke_zstedc_work.c + lapacke_zstegr.c + lapacke_zstegr_work.c + lapacke_zstein.c + lapacke_zstein_work.c + lapacke_zstemr.c + lapacke_zstemr_work.c + lapacke_zsteqr.c + lapacke_zsteqr_work.c + lapacke_zsycon.c + lapacke_zsycon_work.c + lapacke_zsyconv.c + lapacke_zsyconv_work.c + lapacke_zsyequb.c + lapacke_zsyequb_work.c + lapacke_zsyrfs.c + lapacke_zsyrfs_work.c + lapacke_zsysv.c + lapacke_zsysv_rook.c + lapacke_zsysv_rook_work.c + lapacke_zsysv_work.c + lapacke_zsysvx.c + lapacke_zsysvx_work.c + lapacke_zsyswapr.c + lapacke_zsyswapr_work.c + lapacke_zsytrf.c + lapacke_zsytrf_work.c + lapacke_zsytri.c + lapacke_zsytri2.c + lapacke_zsytri2_work.c + lapacke_zsytri2x.c + lapacke_zsytri2x_work.c + lapacke_zsytri_work.c + lapacke_zsytrs.c + lapacke_zsytrs2.c + lapacke_zsytrs2_work.c + lapacke_zsytrs_work.c + lapacke_ztbcon.c + lapacke_ztbcon_work.c + lapacke_ztbrfs.c + lapacke_ztbrfs_work.c + lapacke_ztbtrs.c + lapacke_ztbtrs_work.c + lapacke_ztfsm.c + lapacke_ztfsm_work.c + lapacke_ztftri.c + lapacke_ztftri_work.c + lapacke_ztfttp.c + lapacke_ztfttp_work.c + lapacke_ztfttr.c + lapacke_ztfttr_work.c + lapacke_ztgevc.c + lapacke_ztgevc_work.c + lapacke_ztgexc.c + lapacke_ztgexc_work.c + lapacke_ztgsen.c + lapacke_ztgsen_work.c + lapacke_ztgsja.c + lapacke_ztgsja_work.c + lapacke_ztgsna.c + lapacke_ztgsna_work.c + lapacke_ztgsyl.c + lapacke_ztgsyl_work.c + lapacke_ztpcon.c + lapacke_ztpcon_work.c + lapacke_ztpmqrt.c + lapacke_ztpmqrt_work.c + lapacke_ztpqrt.c + lapacke_ztpqrt2.c + lapacke_ztpqrt2_work.c + lapacke_ztpqrt_work.c + lapacke_ztprfb.c + lapacke_ztprfb_work.c + lapacke_ztprfs.c + lapacke_ztprfs_work.c + lapacke_ztptri.c + lapacke_ztptri_work.c + lapacke_ztptrs.c + lapacke_ztptrs_work.c + lapacke_ztpttf.c + lapacke_ztpttf_work.c + lapacke_ztpttr.c + lapacke_ztpttr_work.c + lapacke_ztrcon.c + lapacke_ztrcon_work.c + lapacke_ztrevc.c + lapacke_ztrevc_work.c + lapacke_ztrexc.c + lapacke_ztrexc_work.c + lapacke_ztrrfs.c + lapacke_ztrrfs_work.c + lapacke_ztrsen.c + lapacke_ztrsen_work.c + lapacke_ztrsna.c + lapacke_ztrsna_work.c + lapacke_ztrsyl.c + lapacke_ztrsyl_work.c + lapacke_ztrtri.c + lapacke_ztrtri_work.c + lapacke_ztrtrs.c + lapacke_ztrtrs_work.c + lapacke_ztrttf.c + lapacke_ztrttf_work.c + lapacke_ztrttp.c + lapacke_ztrttp_work.c + lapacke_ztzrzf.c + lapacke_ztzrzf_work.c + lapacke_zunbdb.c + lapacke_zunbdb_work.c + lapacke_zuncsd.c + lapacke_zuncsd_work.c + lapacke_zungbr.c + lapacke_zungbr_work.c + lapacke_zunghr.c + lapacke_zunghr_work.c + lapacke_zunglq.c + lapacke_zunglq_work.c + lapacke_zungql.c + lapacke_zungql_work.c + lapacke_zungqr.c + lapacke_zungqr_work.c + lapacke_zungrq.c + lapacke_zungrq_work.c + lapacke_zungtr.c + lapacke_zungtr_work.c + lapacke_zunmbr.c + lapacke_zunmbr_work.c + lapacke_zunmhr.c + lapacke_zunmhr_work.c + lapacke_zunmlq.c + lapacke_zunmlq_work.c + lapacke_zunmql.c + lapacke_zunmql_work.c + lapacke_zunmqr.c + lapacke_zunmqr_work.c + lapacke_zunmrq.c + lapacke_zunmrq_work.c + lapacke_zunmrz.c + lapacke_zunmrz_work.c + lapacke_zunmtr.c + lapacke_zunmtr_work.c + lapacke_zupgtr.c + lapacke_zupgtr_work.c + lapacke_zupmtr.c + lapacke_zupmtr_work.c + lapacke_zsyr.c + lapacke_csyr.c + lapacke_zsyr_work.c + lapacke_csyr_work.c + lapacke_ilaver.c +) + +set(SRCX + lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c + lapacke_cgbrfsx_work.c lapacke_cporfsx_work.c lapacke_dgerfsx_work.c lapacke_sgbrfsx_work.c lapacke_ssyrfsx_work.c lapacke_zherfsx_work.c + lapacke_cgerfsx.c lapacke_csyrfsx.c lapacke_dporfsx.c lapacke_sgerfsx.c lapacke_zgbrfsx.c lapacke_zporfsx.c + lapacke_cgerfsx_work.c lapacke_csyrfsx_work.c lapacke_dporfsx_work.c lapacke_sgerfsx_work.c lapacke_zgbrfsx_work.c lapacke_zporfsx_work.c + lapacke_cherfsx.c lapacke_dgbrfsx.c lapacke_dsyrfsx.c lapacke_sporfsx.c lapacke_zgerfsx.c lapacke_zsyrfsx.c + lapacke_cherfsx_work.c lapacke_dgbrfsx_work.c lapacke_dsyrfsx_work.c lapacke_sporfsx_work.c lapacke_zgerfsx_work.c lapacke_zsyrfsx_work.c + lapacke_cgbsvxx.c lapacke_cposvxx.c lapacke_dgesvxx.c lapacke_sgbsvxx.c lapacke_ssysvxx.c lapacke_zhesvxx.c + lapacke_cgbsvxx_work.c lapacke_cposvxx_work.c lapacke_dgesvxx_work.c lapacke_sgbsvxx_work.c lapacke_ssysvxx_work.c lapacke_zhesvxx_work.c + lapacke_cgesvxx.c lapacke_csysvxx.c lapacke_dposvxx.c lapacke_sgesvxx.c lapacke_zgbsvxx.c lapacke_zposvxx.c + lapacke_cgesvxx_work.c lapacke_csysvxx_work.c lapacke_dposvxx_work.c lapacke_sgesvxx_work.c lapacke_zgbsvxx_work.c lapacke_zposvxx_work.c + lapacke_chesvxx.c lapacke_dgbsvxx.c lapacke_dsysvxx.c lapacke_sposvxx.c lapacke_zgesvxx.c lapacke_zsysvxx.c + lapacke_chesvxx_work.c lapacke_dgbsvxx_work.c lapacke_dsysvxx_work.c lapacke_sposvxx_work.c lapacke_zgesvxx_work.c lapacke_zsysvxx_work.c +) + + +# FILE PARTS OF TMGLIB +set(MATGEN + lapacke_clatms.c + lapacke_clatms_work.c + lapacke_dlatms.c + lapacke_dlatms_work.c + lapacke_slatms.c + lapacke_slatms_work.c + lapacke_zlatms.c + lapacke_zlatms_work.c + lapacke_clagge.c + lapacke_clagge_work.c + lapacke_dlagge.c + lapacke_dlagge_work.c + lapacke_slagge.c + lapacke_slagge_work.c + lapacke_zlagge.c + lapacke_zlagge_work.c + lapacke_claghe.c + lapacke_claghe_work.c + lapacke_zlaghe.c + lapacke_zlaghe_work.c + lapacke_clagsy.c + lapacke_clagsy_work.c + lapacke_dlagsy.c + lapacke_dlagsy_work.c + lapacke_slagsy.c + lapacke_slagsy_work.c + lapacke_zlagsy.c + lapacke_zlagsy_work.c +) + +set(LAPACKE_REL_SRC "") +if (BUILD_SINGLE) + list(APPEND LAPACKE_REL_SRC ${SSRC}) +endif () + +if (BUILD_DOUBLE) + list(APPEND LAPACKE_REL_SRC ${DSRC}) +endif () + +if (BUILD_COMPLEX) + list(APPEND LAPACKE_REL_SRC ${CSRC}) +endif () + +if (BUILD_COMPLEX16) + list(APPEND LAPACKE_REL_SRC ${ZSRC}) +endif () + +# add lapack-netlib folder to the sources +set(LAPACKE_SOURCES "") +foreach (LAE_FILE ${LAPACKE_REL_SRC}) + list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/lapacke/src/${LAE_FILE}") +endforeach () + +set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/lapacke/include") +execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${lapacke_include_dir}/lapacke_mangling_with_flags.h" "${lapacke_include_dir}/lapacke_mangling.h") +include_directories(${lapacke_include_dir}) +set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") diff --git a/cmake/os.cmake b/cmake/os.cmake new file mode 100644 index 000000000..f5a75027c --- /dev/null +++ b/cmake/os.cmake @@ -0,0 +1,104 @@ +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Detects the OS and sets appropriate variables. + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var + set(MD5SUM "md5 -r") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") + set(MD5SUM "md5 -r") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD") + set(MD5SUM "md5 -n") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + set(EXTRALIB "${EXTRALIB} -lm") + set(NO_EXPRECISION 1) +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX") + set(EXTRALIB "${EXTRALIB} -lm") +endif () + +# TODO: this is probably meant for mingw, not other windows compilers +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + + set(NEED_PIC 0) + set(NO_EXPRECISION 1) + + set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32") + + # probably not going to use these + set(SUFFIX "obj") + set(PSUFFIX "pobj") + set(LIBSUFFIX "a") + + if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") + set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") + endif () + + if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") + + # Test for supporting MS_ABI + # removed string parsing in favor of CMake's version comparison -hpa + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) + # GCC Version >=4.7 + # It is compatible with MSVC ABI. + set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") + endif () + endif () + + # Ensure the correct stack alignment on Win32 + # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 + if (${ARCH} STREQUAL "x86") + if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") + set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") + endif () + set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2") + endif () + +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix") + set(NEED_PIC 0) + set(NO_EXPRECISION 1) + + set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin") +endif () + +if (CYGWIN) + set(NEED_PIC 0) + set(NO_EXPRECISION 1) +endif () + +if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix") + if (SMP) + set(EXTRALIB "${EXTRALIB} -lpthread") + endif () +endif () + +if (QUAD_PRECISION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION") + set(NO_EXPRECISION 1) +endif () + +if (${ARCH} STREQUAL "x86") + set(NO_EXPRECISION 1) +endif () + +if (UTEST_CHECK) + set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") + set(SANITY_CHECK 1) +endif () + +if (SANITY_CHECK) + # TODO: need some way to get $(*F) (target filename) + set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}") +endif () + diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake new file mode 100644 index 000000000..c3fa48655 --- /dev/null +++ b/cmake/prebuild.cmake @@ -0,0 +1,113 @@ +## +## Author: Hank Anderson +## Description: Ported from OpenBLAS/Makefile.prebuild +## This is triggered by system.cmake and runs before any of the code is built. +## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files). +## Next it runs f_check and appends some fortran information to the files. +## Finally it runs getarch and getarch_2nd for even more environment information. + +# CMake vars set by this file: +# CORE +# LIBCORE +# NUM_CORES +# HAVE_MMX +# HAVE_SSE +# HAVE_SSE2 +# HAVE_SSE3 +# MAKE +# SGEMM_UNROLL_M +# SGEMM_UNROLL_N +# DGEMM_UNROLL_M +# DGEMM_UNROLL_M +# QGEMM_UNROLL_N +# QGEMM_UNROLL_N +# CGEMM_UNROLL_M +# CGEMM_UNROLL_M +# ZGEMM_UNROLL_N +# ZGEMM_UNROLL_N +# XGEMM_UNROLL_M +# XGEMM_UNROLL_N +# CGEMM3M_UNROLL_M +# CGEMM3M_UNROLL_N +# ZGEMM3M_UNROLL_M +# ZGEMM3M_UNROLL_M +# XGEMM3M_UNROLL_N +# XGEMM3M_UNROLL_N + +# CPUIDEMU = ../../cpuid/table.o + +if (DEFINED CPUIDEMU) + set(EXFLAGS "-DCPUIDEMU -DVENDOR=99") +endif () + +if (DEFINED TARGET_CORE) + # set the C flags for just this file + set(GETARCH2_FLAGS "-DBUILD_KERNEL") + set(TARGET_MAKE "Makefile_kernel.conf") + set(TARGET_CONF "config_kernel.h") +else() + set(TARGET_MAKE "Makefile.conf") + set(TARGET_CONF "config.h") +endif () + +include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") + +if (NOT NOFORTRAN) + include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") +endif () + +# compile getarch +set(GETARCH_SRC + ${CMAKE_SOURCE_DIR}/getarch.c + ${CPUIDEMO} +) + +if (NOT MSVC) + list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) +endif () + +if (MSVC) +#Use generic for MSVC now +set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) +endif() + +set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") +set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") +file(MAKE_DIRECTORY ${GETARCH_DIR}) +try_compile(GETARCH_RESULT ${GETARCH_DIR} + SOURCES ${GETARCH_SRC} + COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GETARCH_LOG + COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} +) + +message(STATUS "Running getarch") + +# use the cmake binary w/ the -E param to run a shell command in a cross-platform way +execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) +execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) + +message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") + +# append config data from getarch to the TARGET file and read in CMake vars +file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT}) +ParseGetArchVars(${GETARCH_MAKE_OUT}) + +set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") +set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") +file(MAKE_DIRECTORY ${GETARCH2_DIR}) +try_compile(GETARCH2_RESULT ${GETARCH2_DIR} + SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c + COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GETARCH2_LOG + COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} +) + +# use the cmake binary w/ the -E param to run a shell command in a cross-platform way +execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) +execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) + +# append config data from getarch_2nd to the TARGET file and read in CMake vars +file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT}) +ParseGetArchVars(${GETARCH2_MAKE_OUT}) + diff --git a/cmake/system.cmake b/cmake/system.cmake new file mode 100644 index 000000000..134e9c12d --- /dev/null +++ b/cmake/system.cmake @@ -0,0 +1,552 @@ +## +## Author: Hank Anderson +## Description: Ported from OpenBLAS/Makefile.system +## + +set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib") + +# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa +# http://stackoverflow.com/questions/714100/os-detecting-makefile + +# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa + +# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1. +if (DEFINED TARGET_CORE) + set(TARGET ${TARGET_CORE}) +endif () + +# Force fallbacks for 32bit +if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) + message(STATUS "Compiling a ${BINARY}-bit binary.") + set(NO_AVX 1) + if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE") + set(TARGET "NEHALEM") + endif () + if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER") + set(TARGET "BARCELONA") + endif () +endif () + +if (DEFINED TARGET) + message(STATUS "Targetting the ${TARGET} architecture.") + set(GETARCH_FLAGS "-DFORCE_${TARGET}") +endif () + +if (INTERFACE64) + message(STATUS "Using 64-bit integers.") + set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT") +endif () + +if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD) + set(GEMM_MULTITHREAD_THRESHOLD 4) +endif () +message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.") +set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}") + +if (NO_AVX) + message(STATUS "Disabling Advanced Vector Extensions (AVX).") + set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX") +endif () + +if (NO_AVX2) + message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).") + set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2") +endif () + +if (CMAKE_BUILD_TYPE STREQUAL Debug) + set(GETARCH_FLAGS "${GETARCH_FLAGS} -g") +endif () + +# TODO: let CMake handle this? -hpa +#if (${QUIET_MAKE}) +# set(MAKE "${MAKE} -s") +#endif() + +if (NOT DEFINED NO_PARALLEL_MAKE) + set(NO_PARALLEL_MAKE 0) +endif () +set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}") + +if (CMAKE_CXX_COMPILER STREQUAL loongcc) + set(GETARCH_FLAGS "${GETARCH_FLAGS} -static") +endif () + +#if don't use Fortran, it will only compile CBLAS. +if (ONLY_CBLAS) + set(NO_LAPACK 1) +else () + set(ONLY_CBLAS 0) +endif () + +include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake") + +if (NOT DEFINED NUM_THREADS) + set(NUM_THREADS ${NUM_CORES}) +endif () + +if (${NUM_THREADS} EQUAL 1) + set(USE_THREAD 0) +endif () + +if (DEFINED USE_THREAD) + if (NOT ${USE_THREAD}) + unset(SMP) + else () + set(SMP 1) + endif () +else () + # N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa + if (${NUM_THREADS} EQUAL 1) + unset(SMP) + else () + set(SMP 1) + endif () +endif () + +if (${SMP}) + message(STATUS "SMP enabled.") +endif () + +if (NOT DEFINED NEED_PIC) + set(NEED_PIC 1) +endif () + +# TODO: I think CMake should be handling all this stuff -hpa +unset(ARFLAGS) +set(CPP "${COMPILER} -E") +set(AR "${CROSS_SUFFIX}ar") +set(AS "${CROSS_SUFFIX}as") +set(LD "${CROSS_SUFFIX}ld") +set(RANLIB "${CROSS_SUFFIX}ranlib") +set(NM "${CROSS_SUFFIX}nm") +set(DLLWRAP "${CROSS_SUFFIX}dllwrap") +set(OBJCOPY "${CROSS_SUFFIX}objcopy") +set(OBJCONV "${CROSS_SUFFIX}objconv") + +# OS dependent settings +include("${CMAKE_SOURCE_DIR}/cmake/os.cmake") + +# Architecture dependent settings +include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake") + +# C Compiler dependent settings +include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake") + +if (NOT NOFORTRAN) + # Fortran Compiler dependent settings + include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake") +endif () + +if (BINARY64) + if (INTERFACE64) + # CCOMMON_OPT += -DUSE64BITINT + endif () +endif () + +if (NEED_PIC) + if (${CMAKE_C_COMPILER} STREQUAL "IBM") + set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC") + endif () + + if (${F_COMPILER} STREQUAL "SUN") + set(FCOMMON_OPT "${FCOMMON_OPT} -pic") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC") + endif () +endif () + +if (DYNAMIC_ARCH) + set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH") +endif () + +if (NO_LAPACK) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK") + #Disable LAPACK C interface + set(NO_LAPACKE 1) +endif () + +if (NO_LAPACKE) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE") +endif () + +if (NO_AVX) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") +endif () + +if (${ARCH} STREQUAL "x86") + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") +endif () + +if (NO_AVX2) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2") +endif () + +if (SMP) + set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER") + + if (${ARCH} STREQUAL "mips64") + if (NOT ${CORE} STREQUAL "LOONGSON3B") + set(USE_SIMPLE_THREADED_LEVEL3 1) + endif () + endif () + + if (USE_OPENMP) + # USE_SIMPLE_THREADED_LEVEL3 = 1 + # NO_AFFINITY = 1 + set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP") + endif () + + if (BIGNUMA) + set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA") + endif () + +endif () + +if (NO_WARMUP) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP") +endif () + +if (CONSISTENT_FPCSR) + set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR") +endif () + +# Only for development +# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST") +# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST") +# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING") +# set(USE_PAPI 1) + +if (USE_PAPI) + set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI") + set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr") +endif () + +if (DYNAMIC_THREADS) + set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS") +endif () + +set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}") + +if (USE_SIMPLE_THREADED_LEVEL3) + set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3") +endif () + +if (DEFINED LIBNAMESUFFIX) + set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}") +else () + set(LIBPREFIX "libopenblas") +endif () + +if (NOT DEFINED SYMBOLPREFIX) + set(SYMBOLPREFIX "") +endif () + +if (NOT DEFINED SYMBOLSUFFIX) + set(SYMBOLSUFFIX "") +endif () + +set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}") + +# TODO: nead to convert these Makefiles +# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake + +if (${CORE} STREQUAL "PPC440") + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC") +endif () + +if (${CORE} STREQUAL "PPC440FP2") + set(STATIC_ALLOCATION 1) +endif () + +if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + set(NO_AFFINITY 1) +endif () + +if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B") + set(NO_AFFINITY 1) +endif () + +if (NO_AFFINITY) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY") +endif () + +if (FUNCTION_PROFILE) + set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE") +endif () + +if (HUGETLB_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB") +endif () + +if (DEFINED HUGETLBFILE_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})") +endif () + +if (STATIC_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC") +endif () + +if (DEVICEDRIVER_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"") +endif () + +if (MIXED_MEMORY_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") + set(TAR gtar) + set(PATCH gpatch) + set(GREP ggrep) +else () + set(TAR tar) + set(PATCH patch) + set(GREP grep) +endif () + +if (NOT DEFINED MD5SUM) + set(MD5SUM md5sum) +endif () + +set(AWK awk) + +set(REVISION "-r${OpenBLAS_VERSION}") +set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION}) + +if (DEBUG) + set(COMMON_OPT "${COMMON_OPT} -g") +endif () + +if (NOT DEFINED COMMON_OPT) + set(COMMON_OPT "-O2") +endif () + +#For x86 32-bit +if (DEFINED BINARY AND BINARY EQUAL 32) +if (NOT MSVC) + set(COMMON_OPT "${COMMON_OPT} -m32") +endif() +endif() + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") +if(NOT MSVC) +set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") +endif() +# TODO: not sure what PFLAGS is -hpa +set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}") + +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}") +# TODO: not sure what FPFLAGS is -hpa +set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}") + +#For LAPACK Fortran codes. +set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}") +set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}") + +#Disable -fopenmp for LAPACK Fortran codes on Windows. +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel") + foreach (FILTER_FLAG ${FILTER_FLAGS}) + string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS}) + string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS}) + endforeach () +endif () + +if ("${F_COMPILER}" STREQUAL "GFORTRAN") + # lapack-netlib is rife with uninitialized warnings -hpa + set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized") +endif () + +set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H") +if (INTERFACE64) + set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS") +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE") +endif () + +if (NOT DEFINED SUFFIX) + set(SUFFIX o) +endif () + +if (NOT DEFINED PSUFFIX) + set(PSUFFIX po) +endif () + +if (NOT DEFINED LIBSUFFIX) + set(LIBSUFFIX a) +endif () + +if (DYNAMIC_ARCH) + if (DEFINED SMP) + set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}") + set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}") + else () + set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}") + set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}") + endif () +else () + if (DEFINED SMP) + set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}") + set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}") + else () + set(LIBNAME "${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}") + set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}") + endif () +endif () + + +set(LIBDLLNAME "${LIBPREFIX}.dll") +set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so") +set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib") +set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def") +set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp") +set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip") + +set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}") +set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}") + + +set(LIB_COMPONENTS BLAS) +if (NOT NO_CBLAS) + set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS") +endif () + +if (NOT NO_LAPACK) + set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK") + if (NOT NO_LAPACKE) + set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE") + endif () +endif () + +if (ONLY_CBLAS) + set(LIB_COMPONENTS CBLAS) +endif () + + +# For GEMM3M +set(USE_GEMM3M 0) + +if (DEFINED ARCH) + if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") + set(USE_GEMM3M 1) + endif () + + if (${CORE} STREQUAL "generic") + set(USE_GEMM3M 0) + endif () +endif () + + +#export OSNAME +#export ARCH +#export CORE +#export LIBCORE +#export PGCPATH +#export CONFIG +#export CC +#export FC +#export BU +#export FU +#export NEED2UNDERSCORES +#export USE_THREAD +#export NUM_THREADS +#export NUM_CORES +#export SMP +#export MAKEFILE_RULE +#export NEED_PIC +#export BINARY +#export BINARY32 +#export BINARY64 +#export F_COMPILER +#export C_COMPILER +#export USE_OPENMP +#export CROSS +#export CROSS_SUFFIX +#export NOFORTRAN +#export NO_FBLAS +#export EXTRALIB +#export CEXTRALIB +#export FEXTRALIB +#export HAVE_SSE +#export HAVE_SSE2 +#export HAVE_SSE3 +#export HAVE_SSSE3 +#export HAVE_SSE4_1 +#export HAVE_SSE4_2 +#export HAVE_SSE4A +#export HAVE_SSE5 +#export HAVE_AVX +#export HAVE_VFP +#export HAVE_VFPV3 +#export HAVE_VFPV4 +#export HAVE_NEON +#export KERNELDIR +#export FUNCTION_PROFILE +#export TARGET_CORE +# +#export SGEMM_UNROLL_M +#export SGEMM_UNROLL_N +#export DGEMM_UNROLL_M +#export DGEMM_UNROLL_N +#export QGEMM_UNROLL_M +#export QGEMM_UNROLL_N +#export CGEMM_UNROLL_M +#export CGEMM_UNROLL_N +#export ZGEMM_UNROLL_M +#export ZGEMM_UNROLL_N +#export XGEMM_UNROLL_M +#export XGEMM_UNROLL_N +#export CGEMM3M_UNROLL_M +#export CGEMM3M_UNROLL_N +#export ZGEMM3M_UNROLL_M +#export ZGEMM3M_UNROLL_N +#export XGEMM3M_UNROLL_M +#export XGEMM3M_UNROLL_N + + +#if (USE_CUDA) +# export CUDADIR +# export CUCC +# export CUFLAGS +# export CULIB +#endif + +#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f +# +#.f.$(SUFFIX): +# $(FC) $(FFLAGS) -c $< -o $(@F) +# +#.f.$(PSUFFIX): +# $(FC) $(FPFLAGS) -pg -c $< -o $(@F) + +# these are not cross-platform +#ifdef BINARY64 +#PATHSCALEPATH = /opt/pathscale/lib/3.1 +#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib +#else +#PATHSCALEPATH = /opt/pathscale/lib/3.1/32 +#PGIPATH = /opt/pgi/linux86/7.1-5/lib +#endif + +#ACMLPATH = /opt/acml/4.3.0 +#ifneq ($(OSNAME), Darwin) +#MKLPATH = /opt/intel/mkl/10.2.2.025/lib +#else +#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib +#endif +#ATLASPATH = /opt/atlas/3.9.17/opteron +#FLAMEPATH = $(HOME)/flame/lib +#ifneq ($(OSNAME), SunOS) +#SUNPATH = /opt/sunstudio12.1 +#else +#SUNPATH = /opt/SUNWspro +#endif + diff --git a/cmake/utils.cmake b/cmake/utils.cmake new file mode 100644 index 000000000..6e2a98069 --- /dev/null +++ b/cmake/utils.cmake @@ -0,0 +1,346 @@ +# Functions to help with the OpenBLAS build + +# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE +function(ParseGetArchVars GETARCH_IN) + string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}") + foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) + # split the line into var and value, then assign the value to a CMake var + string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") + list(GET SPLIT_VAR 0 VAR_NAME) + list(GET SPLIT_VAR 1 VAR_VALUE) + set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) + endforeach () +endfunction () + +# Reads a Makefile into CMake vars. +macro(ParseMakefileVars MAKEFILE_IN) + message(STATUS "Reading vars from ${MAKEFILE_IN}...") + file(STRINGS ${MAKEFILE_IN} makefile_contents) + foreach (makefile_line ${makefile_contents}) + string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}") + if (NOT "${line_match}" STREQUAL "") + set(var_name ${CMAKE_MATCH_1}) + set(var_value ${CMAKE_MATCH_2}) + # check for Makefile variables in the string, e.g. $(TSUFFIX) + string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value}) + foreach (make_var ${make_var_matches}) + # strip out Makefile $() markup + string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var}) + # now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote) + string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value}) + endforeach () + set(${var_name} ${var_value}) + else () + string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}") + if (NOT "${line_match}" STREQUAL "") + ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1}) + endif () + endif () + endforeach () +endmacro () + +# Returns all combinations of the input list, as a list with colon-separated combinations +# E.g. input of A B C returns A B C A:B A:C B:C +# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). +# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present. +# @returns LIST_OUT a list of combinations +# CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen +function(AllCombinations list_in absent_codes_in) + list(LENGTH list_in list_count) + set(num_combos 1) + # subtract 1 since we will iterate from 0 to num_combos + math(EXPR num_combos "(${num_combos} << ${list_count}) - 1") + set(LIST_OUT "") + set(CODES_OUT "") + foreach (c RANGE 0 ${num_combos}) + + set(current_combo "") + set(current_code "") + + # this is a little ridiculous just to iterate through a list w/ indices + math(EXPR last_list_index "${list_count} - 1") + foreach (list_index RANGE 0 ${last_list_index}) + math(EXPR bit "1 << ${list_index}") + math(EXPR combo_has_bit "${c} & ${bit}") + list(GET list_in ${list_index} list_elem) + if (combo_has_bit) + if (current_combo) + set(current_combo "${current_combo}:${list_elem}") + else () + set(current_combo ${list_elem}) + endif () + string(SUBSTRING ${list_elem} 0 1 code_char) + else () + list(GET absent_codes_in ${list_index} code_char) + endif () + set(current_code "${current_code}${code_char}") + endforeach () + + if (current_combo STREQUAL "") + list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space + else () + list(APPEND LIST_OUT ${current_combo}) + endif () + list(APPEND CODES_OUT ${current_code}) + + endforeach () + + set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) + set(CODES_OUT ${CODES_OUT} PARENT_SCOPE) +endfunction () + +# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition +# @param sources_in the source files to build from +# @param defines_in (optional) preprocessor definitions that will be applied to all objects +# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended. +# e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" +# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) +# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) +# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) +# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types. +# 0 - compiles for all types +# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) +# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) +# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) +# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c) +# STRING - compiles only the given type (e.g. DOUBLE) +function(GenerateNamedObjects sources_in) + + if (DEFINED ARGV1) + set(defines_in ${ARGV1}) + endif () + + if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "") + set(name_in ${ARGV2}) + # strip off extension for kernel files that pass in the object name. + get_filename_component(name_in ${name_in} NAME_WE) + endif () + + if (DEFINED ARGV3) + set(use_cblas ${ARGV3}) + else () + set(use_cblas false) + endif () + + if (DEFINED ARGV4) + set(replace_last_with ${ARGV4}) + endif () + + if (DEFINED ARGV5) + set(append_with ${ARGV5}) + endif () + + if (DEFINED ARGV6) + set(no_float_type ${ARGV6}) + else () + set(no_float_type false) + endif () + + if (no_float_type) + set(float_list "DUMMY") # still need to loop once + else () + set(float_list "${FLOAT_TYPES}") + endif () + + set(real_only false) + set(complex_only false) + set(mangle_complex_sources false) + if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "") + if (${ARGV7} EQUAL 1) + set(real_only true) + elseif (${ARGV7} EQUAL 2) + set(complex_only true) + elseif (${ARGV7} EQUAL 3) + set(mangle_complex_sources true) + elseif (${ARGV7} EQUAL 4) + set(mangle_complex_sources true) + set(complex_only true) + elseif (NOT ${ARGV7} EQUAL 0) + set(float_list ${ARGV7}) + endif () + endif () + + if (complex_only) + list(REMOVE_ITEM float_list "SINGLE") + list(REMOVE_ITEM float_list "DOUBLE") + elseif (real_only) + list(REMOVE_ITEM float_list "COMPLEX") + list(REMOVE_ITEM float_list "ZCOMPLEX") + endif () + + set(float_char "") + set(OBJ_LIST_OUT "") + foreach (float_type ${float_list}) + foreach (source_file ${sources_in}) + + if (NOT no_float_type) + string(SUBSTRING ${float_type} 0 1 float_char) + string(TOLOWER ${float_char} float_char) + endif () + + if (NOT name_in) + get_filename_component(source_name ${source_file} NAME_WE) + set(obj_name "${float_char}${source_name}") + else () + # replace * with float_char + if (${name_in} MATCHES "\\*") + string(REPLACE "*" ${float_char} obj_name ${name_in}) + else () + set(obj_name "${float_char}${name_in}") + endif () + endif () + + if (replace_last_with) + string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name}) + else () + set(obj_name "${obj_name}${append_with}") + endif () + + # now add the object and set the defines + set(obj_defines ${defines_in}) + + if (use_cblas) + set(obj_name "cblas_${obj_name}") + list(APPEND obj_defines "CBLAS") + endif () + + list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") + if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX") + list(APPEND obj_defines "DOUBLE") + endif () + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + list(APPEND obj_defines "COMPLEX") + if (mangle_complex_sources) + # add a z to the filename + get_filename_component(source_name ${source_file} NAME) + get_filename_component(source_dir ${source_file} DIRECTORY) + string(REPLACE ${source_name} "z${source_name}" source_file ${source_file}) + endif () + endif () + + if (VERBOSE_GEN) + message(STATUS "${obj_name}:${source_file}") + message(STATUS "${obj_defines}") + endif () + + # create a copy of the source to avoid duplicate obj filename problem with ar.exe + get_filename_component(source_extension ${source_file} EXT) + set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}") + if (IS_ABSOLUTE ${source_file}) + set(old_source_file ${source_file}) + else () + set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}") + endif () + + string(REPLACE ";" "\n#define " define_source "${obj_defines}") + string(REPLACE "=" " " define_source "${define_source}") + file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"") + list(APPEND SRC_LIST_OUT ${new_source_file}) + + endforeach () + endforeach () + + list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT}) + set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE) +endfunction () + +# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in +# @param sources_in the source files to build from +# @param defines_in the preprocessor definitions that will be combined to create the object files +# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects +# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU. +# If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU. +# If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU. +# If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects). +# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel +# @param alternate_name replaces the source name as the object name (define codes are still appended) +# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) +# @param complex_filename_scheme see GenerateNamedObjects +function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme) + + set(alternate_name_in "") + if (DEFINED ARGV5) + set(alternate_name_in ${ARGV5}) + endif () + + set(no_float_type false) + if (DEFINED ARGV6) + set(no_float_type ${ARGV6}) + endif () + + set(complex_filename_scheme "") + if (DEFINED ARGV7) + set(complex_filename_scheme ${ARGV7}) + endif () + + AllCombinations("${defines_in}" "${absent_codes_in}") + set(define_combos ${LIST_OUT}) + set(define_codes ${CODES_OUT}) + + list(LENGTH define_combos num_combos) + math(EXPR num_combos "${num_combos} - 1") + + foreach (c RANGE 0 ${num_combos}) + + list(GET define_combos ${c} define_combo) + list(GET define_codes ${c} define_code) + + foreach (source_file ${sources_in}) + + set(alternate_name ${alternate_name_in}) + + # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with + string(REPLACE ":" ";" define_combo ${define_combo}) + + # now add the object and set the defines + set(cur_defines ${define_combo}) + if ("${cur_defines}" STREQUAL " ") + set(cur_defines ${all_defines_in}) + else () + list(APPEND cur_defines ${all_defines_in}) + endif () + + set(replace_code "") + set(append_code "") + if (replace_scheme EQUAL 1) + set(replace_code ${define_code}) + else () + if (replace_scheme EQUAL 2) + set(append_code "_${define_code}") + elseif (replace_scheme EQUAL 3) + if ("${alternate_name}" STREQUAL "") + string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file}) + else () + string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name}) + endif () + # first extract the last letter + string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match + # break the code up into the first letter and the remaining (should only be 2 anyway) + string(SUBSTRING ${define_code} 0 1 define_code_first) + string(SUBSTRING ${define_code} 1 -1 define_code_second) + set(replace_code "${define_code_first}${last_letter}${define_code_second}") + elseif (replace_scheme EQUAL 4) + # insert code before the last underscore and pass that in as the alternate_name + if ("${alternate_name}" STREQUAL "") + get_filename_component(alternate_name ${source_file} NAME_WE) + endif () + set(extra_underscore "") + # check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel) + string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name}) + string(LENGTH "${underscores}" underscores) + if (underscores EQUAL 0) + set(extra_underscore "_") + endif () + string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name}) + else() + set(append_code ${define_code}) # replace_scheme should be 0 + endif () + endif () + + GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}") + endforeach () + endforeach () + + set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE) +endfunction () + diff --git a/common.h b/common.h index c367e38cb..7b81c6fb6 100644 --- a/common.h +++ b/common.h @@ -82,7 +82,10 @@ extern "C" { #include #include #include + +#if !defined(_MSC_VER) #include +#endif #ifdef OS_LINUX #include @@ -95,6 +98,10 @@ extern "C" { #ifdef OS_ANDROID #define NO_SYSV_IPC +//Android NDK only supports complex.h since Android 5.0 +#if __ANDROID_API__ < 21 +#define FORCE_OPENBLAS_COMPLEX_STRUCT +#endif #endif #ifdef OS_WINDOWS @@ -114,6 +121,7 @@ extern "C" { #include #endif #include +#include #include #include #ifdef SMP @@ -293,13 +301,6 @@ typedef int blasint; #define COMPSIZE 2 #endif -#if defined(C_PGI) || defined(C_SUN) -#define CREAL(X) (*((FLOAT *)&X + 0)) -#define CIMAG(X) (*((FLOAT *)&X + 1)) -#else -#define CREAL __real__ -#define CIMAG __imag__ -#endif #define Address_H(x) (((x)+(1<<15))>>16) #define Address_L(x) ((x)-((Address_H(x))<<16)) @@ -313,8 +314,12 @@ typedef int blasint; #endif #if defined(OS_WINDOWS) +#if defined(_MSC_VER) && !defined(__clang__) +#define YIELDING YieldProcessor() +#else #define YIELDING SwitchToThread() #endif +#endif #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5) #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); @@ -500,18 +505,52 @@ static void __inline blas_lock(volatile BLASULONG *address){ /* C99 supports complex floating numbers natively, which GCC also offers as an extension since version 3.0. If neither are available, use a compatible structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ -#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus))) +#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ + (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) #define OPENBLAS_COMPLEX_C99 + #ifndef __cplusplus + #include + #endif typedef float _Complex openblas_complex_float; typedef double _Complex openblas_complex_double; typedef xdouble _Complex openblas_complex_xdouble; + #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) + #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) + #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) #else #define OPENBLAS_COMPLEX_STRUCT typedef struct { float real, imag; } openblas_complex_float; typedef struct { double real, imag; } openblas_complex_double; typedef struct { xdouble real, imag; } openblas_complex_xdouble; + #define openblas_make_complex_float(real, imag) {(real), (imag)} + #define openblas_make_complex_double(real, imag) {(real), (imag)} + #define openblas_make_complex_xdouble(real, imag) {(real), (imag)} #endif + +#ifdef XDOUBLE +#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble +#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i) +#elif defined(DOUBLE) +#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double +#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i) +#else +#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float +#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i) +#endif + +#if defined(C_PGI) || defined(C_SUN) +#define CREAL(X) (*((FLOAT *)&X + 0)) +#define CIMAG(X) (*((FLOAT *)&X + 1)) +#else +#ifdef OPENBLAS_COMPLEX_STRUCT +#define CREAL(Z) ((Z).real) +#define CIMAG(Z) ((Z).imag) +#else +#define CREAL __real__ +#define CIMAG __imag__ +#endif +#endif + #endif // ASSEMBLER #ifndef IFLUSH @@ -528,6 +567,10 @@ static void __inline blas_lock(volatile BLASULONG *address){ #endif #endif +#if defined(C_MSVC) +#define inline __inline +#endif + #ifndef ASSEMBLER #ifndef MIN diff --git a/common_level1.h b/common_level1.h index 2a1b4f1cf..32ffd6f18 100644 --- a/common_level1.h +++ b/common_level1.h @@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG); double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG); xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); -float _Complex cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); -float _Complex cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); -double _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); -double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); -xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); -xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); +openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); +openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); +openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); +openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); +openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); +openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); diff --git a/common_param.h b/common_param.h index ab40ddeef..36d6149ea 100644 --- a/common_param.h +++ b/common_param.h @@ -830,56 +830,56 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); - int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); - int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); - int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); - int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); - int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); - int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); - int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); - int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); - int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); - int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); - int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); - int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); - int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); - int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); - int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); - int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); - int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); + + int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); + + int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + + int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + + int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); diff --git a/common_x86.h b/common_x86.h index 6c90432a2..1ace84cad 100644 --- a/common_x86.h +++ b/common_x86.h @@ -56,11 +56,23 @@ static void __inline blas_lock(volatile BLASULONG *address){ do { while (*address) {YIELDING;}; +#if defined(_MSC_VER) && !defined(__clang__) + // use intrinsic instead of inline assembly + ret = _InterlockedExchange(address, 1); + // inline assembly + /*__asm { + mov eax, address + mov ebx, 1 + xchg [eax], ebx + mov ret, ebx + }*/ +#else __asm__ __volatile__( "xchgl %0, %1\n" : "=r"(ret), "=m"(*address) : "0"(1), "m"(*address) : "memory"); +#endif } while (ret); @@ -68,31 +80,43 @@ static void __inline blas_lock(volatile BLASULONG *address){ #define BLAS_LOCK_DEFINED static __inline unsigned long long rpcc(void){ +#if defined(_MSC_VER) && !defined(__clang__) + return __rdtsc(); // use MSVC intrinsic +#else unsigned int a, d; __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); return ((unsigned long long)a + ((unsigned long long)d << 32)); +#endif }; #define RPCC_DEFINED static __inline unsigned long getstackaddr(void){ +#if defined(_MSC_VER) && !defined(__clang__) + return (unsigned long)_ReturnAddress(); // use MSVC intrinsic +#else unsigned long addr; __asm__ __volatile__ ("mov %%esp, %0" : "=r"(addr) : : "memory"); return addr; +#endif }; static __inline long double sqrt_long(long double val) { +#if defined(_MSC_VER) && !defined(__clang__) + return sqrt(val); // not sure if this will use fsqrt +#else long double result; __asm__ __volatile__ ("fldt %1\n" "fsqrt\n" "fstpt %0\n" : "=m" (result) : "m"(val)); return result; +#endif } #define SQRT(a) sqrt_long(a) @@ -102,7 +126,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); #define WHEREAMI -static inline int WhereAmI(void){ +static __inline int WhereAmI(void){ int eax, ebx, ecx, edx; int apicid; @@ -148,9 +172,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ y = blas_quick_divide_table[y]; +#if defined(_MSC_VER) && !defined(__clang__) + (void*)result; + return x*y; +#else __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); return result; +#endif } #endif @@ -286,8 +315,12 @@ REALNAME: #define PROFCODE +#ifdef __clang__ +#define EPILOGUE .end +#else #define EPILOGUE .end REALNAME #endif +#endif #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) #define PROLOGUE \ diff --git a/common_x86_64.h b/common_x86_64.h index 4c783b315..da9afc0e4 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -41,6 +41,10 @@ #ifndef ASSEMBLER +#ifdef C_MSVC +#include +#endif + #ifdef C_SUN #define __asm__ __asm #define __volatile__ @@ -61,32 +65,45 @@ static void __inline blas_lock(volatile BLASULONG *address){ +#ifndef C_MSVC int ret; +#else + BLASULONG ret; +#endif do { while (*address) {YIELDING;}; +#ifndef C_MSVC __asm__ __volatile__( "xchgl %0, %1\n" : "=r"(ret), "=m"(*address) : "0"(1), "m"(*address) : "memory"); - +#else + ret=InterlockedExchange64((volatile LONG64 *)(address), 1); +#endif } while (ret); + } #define BLAS_LOCK_DEFINED static __inline BLASULONG rpcc(void){ +#ifdef C_MSVC + return __rdtsc(); +#else BLASULONG a, d; __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); return ((BLASULONG)a + ((BLASULONG)d << 32)); +#endif } #define RPCC_DEFINED #define RPCC64BIT +#ifndef C_MSVC static __inline BLASULONG getstackaddr(void){ BLASULONG addr; @@ -95,22 +112,32 @@ static __inline BLASULONG getstackaddr(void){ return addr; } +#endif static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ +#ifdef C_MSVC + int cpuinfo[4]; + __cpuid(cpuinfo, op); + *eax=cpuinfo[0]; + *ebx=cpuinfo[1]; + *ecx=cpuinfo[2]; + *edx=cpuinfo[3]; +#else __asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op)); +#endif } /* #define WHEREAMI */ -static inline int WhereAmI(void){ +static __inline int WhereAmI(void){ int eax, ebx, ecx, edx; int apicid; @@ -152,10 +179,14 @@ static inline int WhereAmI(void){ #define GET_IMAGE_CANCEL #ifdef SMP -#ifdef USE64BITINT +#if defined(USE64BITINT) static __inline blasint blas_quickdivide(blasint x, blasint y){ return x / y; } +#elif defined (C_MSVC) +static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){ + return x / y; +} #else extern unsigned int blas_quick_divide_table[]; diff --git a/cpuid.h b/cpuid.h index 8a8cdf6dd..e9bd2d016 100644 --- a/cpuid.h +++ b/cpuid.h @@ -39,6 +39,10 @@ #ifndef CPUID_H #define CPUID_H +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) +#define INTEL_AMD +#endif + #define VENDOR_INTEL 1 #define VENDOR_UMC 2 #define VENDOR_AMD 3 @@ -59,7 +63,7 @@ #define FAMILY_PM 7 #define FAMILY_IA64 8 -#if defined(__i386__) || defined(__x86_64__) +#ifdef INTEL_AMD #define GET_EXFAMILY 1 #define GET_EXMODEL 2 #define GET_TYPE 3 diff --git a/cpuid_x86.c b/cpuid_x86.c index 135ac7cf9..a65991041 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -40,6 +40,12 @@ #include #include "cpuid.h" +#if defined(_MSC_VER) && !defined(__clang__) +#define C_INLINE __inline +#else +#define C_INLINE inline +#endif + /* #ifdef NO_AVX #define CPUTYPE_HASWELL CPUTYPE_NEHALEM @@ -53,12 +59,26 @@ #endif */ +#if defined(_MSC_VER) && !defined(__clang__) + +void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + int cpuInfo[4] = {-1}; + __cpuid(cpuInfo, op); + *eax = cpuInfo[0]; + *ebx = cpuInfo[1]; + *ecx = cpuInfo[2]; + *edx = cpuInfo[3]; +} + +#else + #ifndef CPUIDEMU #if defined(__APPLE__) && defined(__i386__) void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); #else -static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ +static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ #if defined(__i386__) && defined(__PIC__) __asm__ __volatile__ ("mov %%ebx, %%edi;" @@ -115,14 +135,16 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int * #endif -static inline int have_cpuid(void){ +#endif // _MSC_VER + +static C_INLINE int have_cpuid(void){ int eax, ebx, ecx, edx; cpuid(0, &eax, &ebx, &ecx, &edx); return eax; } -static inline int have_excpuid(void){ +static C_INLINE int have_excpuid(void){ int eax, ebx, ecx, edx; cpuid(0x80000000, &eax, &ebx, &ecx, &edx); @@ -130,10 +152,14 @@ static inline int have_excpuid(void){ } #ifndef NO_AVX -static inline void xgetbv(int op, int * eax, int * edx){ +static C_INLINE void xgetbv(int op, int * eax, int * edx){ //Use binary code for xgetbv +#if defined(_MSC_VER) && !defined(__clang__) + *eax = __xgetbv(op); +#else __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); +#endif } #endif diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt new file mode 100644 index 000000000..dbe785bcb --- /dev/null +++ b/ctest/CMakeLists.txt @@ -0,0 +1,46 @@ +include_directories(${CMAKE_SOURCE_DIR}) + +enable_language(Fortran) + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS") + +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh +"$1 < $2\n" +) + +foreach(float_type ${FLOAT_TYPES}) + string(SUBSTRING ${float_type} 0 1 float_char_upper) + string(TOLOWER ${float_char_upper} float_char) + #level1 + add_executable(x${float_char}cblat1 + c_${float_char}blat1.f + c_${float_char}blas1.c) + target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static) + add_test(NAME "x${float_char}cblat1" + COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1") + + #level2 + add_executable(x${float_char}cblat2 + c_${float_char}blat2.f + c_${float_char}blas2.c + c_${float_char}2chke.c + auxiliary.c + c_xerbla.c + constant.c) + target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static) + add_test(NAME "x${float_char}cblat2" + COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2") + + #level3 + add_executable(x${float_char}cblat3 + c_${float_char}blat3.f + c_${float_char}blas3.c + c_${float_char}3chke.c + auxiliary.c + c_xerbla.c + constant.c) + target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static) + add_test(NAME "x${float_char}cblat3" + COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3") + +endforeach() diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt new file mode 100644 index 000000000..696767486 --- /dev/null +++ b/driver/level2/CMakeLists.txt @@ -0,0 +1,203 @@ + +include_directories(${CMAKE_SOURCE_DIR}) + +# sources that need to be compiled twice, once with no flags and once with LOWER +set(UL_SOURCES + sbmv_k.c + spmv_k.c + spr_k.c + spr2_k.c + syr_k.c + syr2_k.c +) + +# sources that need to be compiled several times, for UNIT, TRANSA +set(U_SOURCES + trmv_U.c + tbmv_U.c + tbsv_U.c + tpmv_U.c + tpsv_U.c + trsv_U.c +) + +set(L_SOURCES + trmv_L.c + tbmv_L.c + tbsv_L.c + tpmv_L.c + tpsv_L.c + trsv_L.c +) + +set(UL_SMP_SOURCES + symv_thread.c + syr_thread.c + syr2_thread.c + spr_thread.c + spr2_thread.c + spmv_thread.c + sbmv_thread.c +) + +set(NU_SMP_SOURCES + trmv_thread.c + tpmv_thread.c + tbmv_thread.c +) + +set(ULVM_COMPLEX_SOURCES + hbmv_k.c + hpmv_k.c + hpr_k.c + hpr2_k.c + her_k.c + her2_k.c +) + +# objects that need LOWER set +GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) + +# gbmv uses a lowercase n and t +GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) +GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) +# c/zgbmv +GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2) + +# special defines for complex +foreach (float_type ${FLOAT_TYPES}) + + if (SMP) + GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type}) + + GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t" false "" "" false ${float_type}) + endif () + + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + + foreach (u_source ${U_SOURCES}) + string(REGEX MATCH "[a-z]+" op_name ${u_source}) + GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NU" false ${float_type}) + GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TL" false ${float_type}) + GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RU" false ${float_type}) + GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CL" false ${float_type}) + endforeach () + + foreach (l_source ${L_SOURCES}) + string(REGEX MATCH "[a-z]+" op_name ${l_source}) + GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NL" false ${float_type}) + GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TU" false ${float_type}) + GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RL" false ${float_type}) + GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type}) + endforeach () + + foreach (ulvm_source ${ULVM_COMPLEX_SOURCES}) + string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source}) + GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type}) + GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type}) + GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type}) + GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type}) + endforeach() + + if (SMP) + + GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "XCONJ" "gemv_thread_o" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "XCONJ;TRANSA" "gemv_thread_u" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ" "gemv_thread_s" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ;TRANSA" "gemv_thread_d" false "" "" false ${float_type}) + + GenerateNamedObjects("gbmv_thread.c" "CONJ" "gbmv_thread_r" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "CONJ;TRANSA" "gbmv_thread_c" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "XCONJ" "gbmv_thread_o" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "XCONJ;TRANSA" "gbmv_thread_u" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ" "gbmv_thread_s" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ;TRANSA" "gbmv_thread_d" false "" "" false ${float_type}) + + GenerateNamedObjects("ger_thread.c" "" "ger_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("ger_thread.c" "CONJ" "ger_thread_C" false "" "" false ${float_type}) + GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) + + GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "HERREV" "her_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "LOWER;HERREV" "her_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("syr2_thread.c" "HER" "her2_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "HER;LOWER" "her2_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "HERREV" "her2_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "LOWER;HERREV" "her2_thread_M" false "" "" false ${float_type}) + + foreach (nu_smp_src ${NU_SMP_SOURCES}) + string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) + GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) + GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=2" 0 "${op_name}_T" false ${float_type}) + GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "${op_name}_R" false ${float_type}) + GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "${op_name}_C" false ${float_type}) + endforeach () + endif () + + else () + # For real number functions + foreach (u_source ${U_SOURCES}) + string(REGEX MATCH "[a-z]+" op_name ${u_source}) + GenerateCombinationObjects("${u_source}" "UNIT" "N" "" 0 "${op_name}_NU" false ${float_type}) + GenerateCombinationObjects("${u_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TL" false ${float_type}) + endforeach () + + foreach (l_source ${L_SOURCES}) + string(REGEX MATCH "[a-z]+" op_name ${l_source}) + GenerateCombinationObjects("${l_source}" "UNIT" "N" "" 0 "${op_name}_NL" false ${float_type}) + GenerateCombinationObjects("${l_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TU" false ${float_type}) + endforeach () + + if (SMP) + GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type}) + foreach(nu_smp_source ${NU_SMP_SOURCES}) + string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_source}) + GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "" 0 "${op_name}_N" false ${float_type}) + GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "TRANSA" 0 "${op_name}_T" false ${float_type}) + endforeach() + endif () + endif () +endforeach () + +if (SMP) + GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) +endif () + +add_library(driver_level2 OBJECT ${OPENBLAS_SRC}) diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c index 9efe17092..ef9d58d76 100644 --- a/driver/level2/gbmv_thread.c +++ b/driver/level2/gbmv_thread.c @@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c index 5b7fc7332..a0377d638 100644 --- a/driver/level2/sbmv_thread.c +++ b/driver/level2/sbmv_thread.c @@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif a = (FLOAT *)args -> a; diff --git a/driver/level2/spmv_thread.c b/driver/level2/spmv_thread.c index 93a2f44d4..0f47344df 100644 --- a/driver/level2/spmv_thread.c +++ b/driver/level2/spmv_thread.c @@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif a = (FLOAT *)args -> a; diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c index 3c1249448..bbb1c50eb 100644 --- a/driver/level2/tbmv_thread.c +++ b/driver/level2/tbmv_thread.c @@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/tpmv_thread.c b/driver/level2/tpmv_thread.c index 3b91cee45..47dc1daf9 100644 --- a/driver/level2/tpmv_thread.c +++ b/driver/level2/tpmv_thread.c @@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/trmv_thread.c b/driver/level2/trmv_thread.c index 29e9799f6..a9dc2dc62 100644 --- a/driver/level2/trmv_thread.c +++ b/driver/level2/trmv_thread.c @@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/zgbmv_k.c b/driver/level2/zgbmv_k.c index 68d6045bd..d89932e33 100644 --- a/driver/level2/zgbmv_k.c +++ b/driver/level2/zgbmv_k.c @@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA FLOAT *bufferY = gemvbuffer; FLOAT *bufferX = gemvbuffer; #ifdef TRANS - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif if (incy != 1) { diff --git a/driver/level2/zhbmv_k.c b/driver/level2/zhbmv_k.c index 70e92e050..33f70d2c5 100644 --- a/driver/level2/zhbmv_k.c +++ b/driver/level2/zhbmv_k.c @@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT *bufferX = sbmvbuffer; FLOAT temp[2]; + OPENBLAS_COMPLEX_FLOAT result; + if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); @@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); + result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); + result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); + result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); + result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); diff --git a/driver/level2/zhpmv_k.c b/driver/level2/zhpmv_k.c index 96bceaaf2..9e7ed7b0e 100644 --- a/driver/level2/zhpmv_k.c +++ b/driver/level2/zhpmv_k.c @@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *bufferX = gemvbuffer; FLOAT temp[2]; + OPENBLAS_COMPLEX_FLOAT result; + if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); @@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #ifndef HEMVREV #ifndef LOWER if (i > 0) { - FLOAT _Complex result = DOTC_K(i, a, 1, X, 1); + result = DOTC_K(i, a, 1, X, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #else if (m - i > 1) { - FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); + result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #else #ifndef LOWER if (i > 0) { - FLOAT _Complex result = DOTU_K(i, a, 1, X, 1); + result = DOTU_K(i, a, 1, X, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #else if (m - i > 1) { - FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); + result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); diff --git a/driver/level2/zsbmv_k.c b/driver/level2/zsbmv_k.c index 30e2f91c3..3ae74ce80 100644 --- a/driver/level2/zsbmv_k.c +++ b/driver/level2/zsbmv_k.c @@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT *bufferY = sbmvbuffer; FLOAT *bufferX = sbmvbuffer; + OPENBLAS_COMPLEX_FLOAT result; + if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); @@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); + result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, a, 1, Y + i * COMPSIZE, 1, NULL, 0); if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); + result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); diff --git a/driver/level2/zspmv_k.c b/driver/level2/zspmv_k.c index 76657eab9..432205e83 100644 --- a/driver/level2/zspmv_k.c +++ b/driver/level2/zspmv_k.c @@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *bufferY = gemvbuffer; FLOAT *bufferX = gemvbuffer; - FLOAT _Complex result; + + OPENBLAS_COMPLEX_FLOAT result; if (incy != 1) { Y = bufferY; diff --git a/driver/level2/ztbmv_L.c b/driver/level2/ztbmv_L.c index 74ff0bce1..1ac1cdef1 100644 --- a/driver/level2/ztbmv_L.c +++ b/driver/level2/ztbmv_L.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztbmv_U.c b/driver/level2/ztbmv_U.c index 933275de3..9aa203396 100644 --- a/driver/level2/ztbmv_U.c +++ b/driver/level2/ztbmv_U.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztbsv_L.c b/driver/level2/ztbsv_L.c index 0726bbd16..9aa701841 100644 --- a/driver/level2/ztbsv_L.c +++ b/driver/level2/ztbsv_L.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztbsv_U.c b/driver/level2/ztbsv_U.c index d022650bc..3722b1f71 100644 --- a/driver/level2/ztbsv_U.c +++ b/driver/level2/ztbsv_U.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztpmv_L.c b/driver/level2/ztpmv_L.c index 12c254c12..47e6df56c 100644 --- a/driver/level2/ztpmv_L.c +++ b/driver/level2/ztpmv_L.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztpmv_U.c b/driver/level2/ztpmv_U.c index 59708b8b8..da911fb4e 100644 --- a/driver/level2/ztpmv_U.c +++ b/driver/level2/ztpmv_U.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztpsv_L.c b/driver/level2/ztpsv_L.c index 3b8e562ce..a497e42a4 100644 --- a/driver/level2/ztpsv_L.c +++ b/driver/level2/ztpsv_L.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztpsv_U.c b/driver/level2/ztpsv_U.c index 601ac2f9d..28b824e3a 100644 --- a/driver/level2/ztpsv_U.c +++ b/driver/level2/ztpsv_U.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztrmv_L.c b/driver/level2/ztrmv_L.c index 63522cf81..92c86aec2 100644 --- a/driver/level2/ztrmv_L.c +++ b/driver/level2/ztrmv_L.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztrmv_U.c b/driver/level2/ztrmv_U.c index 8a4494fd7..f9671c9d6 100644 --- a/driver/level2/ztrmv_U.c +++ b/driver/level2/ztrmv_U.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztrsv_L.c b/driver/level2/ztrsv_L.c index 90f1c2c7d..dd3b2786e 100644 --- a/driver/level2/ztrsv_L.c +++ b/driver/level2/ztrsv_L.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztrsv_U.c b/driver/level2/ztrsv_U.c index bec8114f3..8803182a8 100644 --- a/driver/level2/ztrsv_U.c +++ b/driver/level2/ztrsv_U.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt new file mode 100644 index 000000000..41d440f7a --- /dev/null +++ b/driver/level3/CMakeLists.txt @@ -0,0 +1,115 @@ +include_directories(${CMAKE_SOURCE_DIR}) + +# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa + +# loop through gemm.c defines +set(GEMM_DEFINES NN NT TN TT) +set(GEMM_COMPLEX_DEFINES RN CN RT CT NR TR RR CR NC TC RC CC) +foreach (GEMM_DEFINE ${GEMM_DEFINES}) + string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) + GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0) + if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0) + endif () +endforeach () + + +set(TRMM_TRSM_SOURCES + trmm_L.c + trmm_R.c + trsm_L.c + trsm_R.c) + +foreach(trmm_trsm_source ${TRMM_TRSM_SOURCES}) + string(REGEX MATCH "[a-z]+_[A-Z]+" op_name ${trmm_trsm_source}) + GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "" 0 "${op_name}N") + GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "TRANSA" 0 "${op_name}T") +endforeach() + +GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "NN" 1) +GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "" 1) +GenerateCombinationObjects("syr2k_k.c" "LOWER;TRANS" "U;N" "" 1) +GenerateCombinationObjects("syrk_kernel.c" "LOWER" "U" "" 2) +GenerateCombinationObjects("syr2k_kernel.c" "LOWER" "U" "" 2) +if (SMP) + + # N.B. these do NOT have a float type (e.g. DOUBLE) defined! + GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" 0 "" "" 1) + + if (NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "THREADED_LEVEL3" 2 "syrk_thread") + GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "THREADED_LEVEL3;NN" 2 "symm_thread") + endif () +endif () + +foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateCombinationObjects("zherk_kernel.c" "LOWER;CONJ" "U;N" "HERK" 2 "herk_kernel" false ${float_type}) + # TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination + GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type}) + GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type}) + GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type}) + GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) + # Need to set CONJ for trmm and trsm + GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type}) + GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type}) + GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_RR" false ${float_type}) + GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_RC" false ${float_type}) + GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_LR" false ${float_type}) + GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_LC" false ${float_type}) + GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) + GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_RC" false ${float_type}) + + #hemm + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type}) + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type}) + + #her2k + GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) + + if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + #hemm + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) + #her2k + GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) + endif() + + # special gemm defines for complex + foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) + string(TOLOWER ${gemm_define} gemm_define_LC) + GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type}) + if(USE_GEMM3M) + GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type}) + endif() + if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) + if(USE_GEMM3M) + GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type}) + endif() + endif () + endforeach () + endif () +endforeach () + +#HPLOBJS = +# dgemm_nn.c dgemm_nt.c dgemm_tn.c dgemm_tt.c +# dtrsm_LNUU.c dtrsm_LNUN.c dtrsm_LNLU.c dtrsm_LNLN.c +# dtrsm_LTUU.c dtrsm_LTUN.c dtrsm_LTLU.c dtrsm_LTLN.c +# dtrsm_RNUU.c dtrsm_RNUN.c dtrsm_RNLU.c dtrsm_RNLN.c +# dtrsm_RTUU.c dtrsm_RTUN.c dtrsm_RTLU.c dtrsm_RTLN.c +# +#if (USE_SIMPLE_THREADED_LEVEL3) +# HPLOBJS += dgemm_thread_nn.c dgemm_thread_nt.c +# dgemm_thread_tn.c dgemm_thread_tt.c +#endif +# + +add_library(driver_level3 OBJECT ${OPENBLAS_SRC}) diff --git a/driver/level3/syr2k_k.c b/driver/level3/syr2k_k.c index 8df0f122f..09131fbdb 100644 --- a/driver/level3/syr2k_k.c +++ b/driver/level3/syr2k_k.c @@ -47,7 +47,7 @@ #endif #endif -static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { +static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { BLASLONG i; diff --git a/driver/level3/syrk_k.c b/driver/level3/syrk_k.c index 08751dc8b..8bc817f87 100644 --- a/driver/level3/syrk_k.c +++ b/driver/level3/syrk_k.c @@ -49,7 +49,7 @@ #endif #endif -static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { +static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { BLASLONG i; diff --git a/driver/level3/trmm_R.c b/driver/level3/trmm_R.c index bdd9370cd..0882aa496 100644 --- a/driver/level3/trmm_R.c +++ b/driver/level3/trmm_R.c @@ -70,6 +70,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG ls, is, js; BLASLONG min_l, min_i, min_j; BLASLONG jjs, min_jj; +#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA))) + BLASLONG start_ls; +#endif m = args -> m; n = args -> n; @@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } #else - BLASLONG start_ls; - for(js = n; js > 0; js -= GEMM_R){ min_j = js; if (min_j > GEMM_R) min_j = GEMM_R; diff --git a/driver/level3/trsm_L.c b/driver/level3/trsm_L.c index 78da0eb6c..d8130ee7e 100644 --- a/driver/level3/trsm_L.c +++ b/driver/level3/trsm_L.c @@ -76,6 +76,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG ls, is, js; BLASLONG min_l, min_i, min_j; BLASLONG jjs, min_jj; +#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA))) + BLASLONG start_is; +#endif m = args -> m; n = args -> n; @@ -178,8 +181,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } } #else - BLASLONG start_is; - for(ls = m; ls > 0; ls -= GEMM_Q){ min_l = ls; if (min_l > GEMM_Q) min_l = GEMM_Q; diff --git a/driver/level3/trsm_R.c b/driver/level3/trsm_R.c index 169441d1e..f6a57f93f 100644 --- a/driver/level3/trsm_R.c +++ b/driver/level3/trsm_R.c @@ -75,6 +75,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG ls, is, js; BLASLONG min_l, min_i, min_j; BLASLONG jjs, min_jj; +#if !((defined(UPPER) && !defined(TRANSA)) || (!defined(UPPER) && defined(TRANSA))) + BLASLONG start_ls; +#endif m = args -> m; n = args -> n; @@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } #else - BLASLONG start_ls; - for(js = n; js > 0; js -= GEMM_R){ min_j = js; if (min_j > GEMM_R) min_j = GEMM_R; diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt new file mode 100644 index 000000000..b2af55e36 --- /dev/null +++ b/driver/others/CMakeLists.txt @@ -0,0 +1,75 @@ +include_directories(${CMAKE_SOURCE_DIR}) + +if (${CORE} STREQUAL "PPC440") + set(MEMORY memory_qalloc.c) +else () + set(MEMORY memory.c) +endif () + +if (SMP) + + if (USE_OPENMP) + set(BLAS_SERVER blas_server_omp.c) + elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + set(BLAS_SERVER blas_server_win32.c) + endif () + + if (NOT DEFINED BLAS_SERVER) + set(BLAS_SERVER blas_server.c) + endif () + + set(SMP_SOURCES + ${BLAS_SERVER} + divtable.c # TODO: Makefile has -UDOUBLE + blas_l1_thread.c + ) + + if (NOT NO_AFFINITY) + list(APPEND SMP_SOURCES init.c) + endif () +endif () + +set(COMMON_SOURCES + xerbla.c + openblas_set_num_threads.c + openblas_error_handle.c + openblas_get_num_procs.c + openblas_get_num_threads.c +) + +# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling +GenerateNamedObjects("abs.c" "" "c_abs" 0 "" "" 1 ) +GenerateNamedObjects("abs.c" "DOUBLE" "z_abs" 0 "" "" 1) +GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c" "" "" 0 "" "" 1) + +if (DYNAMIC_ARCH) + list(APPEND COMMON_SOURCES dynamic.c) +else () + list(APPEND COMMON_SOURCES parameter.c) +endif () + +#ifdef EXPRECISION +#COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX) +#endif +# +#ifdef QUAD_PRECISION +#COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX) +#endif +# +#ifdef USE_CUDA +#COMMONOBJS += cuda_init.$(SUFFIX) +#endif +# +#ifdef FUNCTION_PROFILE +#COMMONOBJS += profile.$(SUFFIX) +#endif + +#LIBOTHERS = libothers.$(LIBSUFFIX) + +#ifeq ($(DYNAMIC_ARCH), 1) +#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) +#else +#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) +#endif + +add_library(driver_others OBJECT ${OPENBLAS_SRC} ${MEMORY} ${SMP_SOURCES} ${COMMON_SOURCES}) diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 1fd848c6b..e1c644a80 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -70,9 +70,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /*********************************************************************/ #include "common.h" -#ifdef OS_LINUX +#if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) #include +#include #include +#include #endif #ifndef likely @@ -265,7 +267,7 @@ int get_node(void); static int increased_threads = 0; -static int blas_thread_server(void *arg){ +static void* blas_thread_server(void *arg){ /* Thread identifier */ BLASLONG cpu = (BLASLONG)arg; @@ -458,7 +460,7 @@ static int blas_thread_server(void *arg){ //pthread_exit(NULL); - return 0; + return NULL; } #ifdef MONITOR @@ -565,14 +567,23 @@ int blas_thread_init(void){ #ifdef NEED_STACKATTR ret=pthread_create(&blas_threads[i], &attr, - (void *)&blas_thread_server, (void *)i); + &blas_thread_server, (void *)i); #else ret=pthread_create(&blas_threads[i], NULL, - (void *)&blas_thread_server, (void *)i); + &blas_thread_server, (void *)i); #endif if(ret!=0){ - fprintf(STDERR,"OpenBLAS: pthread_creat error in blas_thread_init function. Error code:%d\n",ret); - exit(1); + struct rlimit rlim; + const char *msg = strerror(ret); + fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg); + if(0 == getrlimit(RLIMIT_NPROC, &rlim)) { + fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC " + "%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max)); + } + if(0 != raise(SIGINT)) { + fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n"); + exit(EXIT_FAILURE); + } } } @@ -832,10 +843,10 @@ void goto_set_num_threads(int num_threads) { #ifdef NEED_STACKATTR pthread_create(&blas_threads[i], &attr, - (void *)&blas_thread_server, (void *)i); + &blas_thread_server, (void *)i); #else pthread_create(&blas_threads[i], NULL, - (void *)&blas_thread_server, (void *)i); + &blas_thread_server, (void *)i); #endif } diff --git a/driver/others/memory.c b/driver/others/memory.c index 49c57f911..ba3dc8a23 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -139,8 +139,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) +#if defined(_MSC_VER) && !defined(__clang__) +#define CONSTRUCTOR __cdecl +#define DESTRUCTOR __cdecl +#elif defined(OS_DARWIN) && defined(C_GCC) #define CONSTRUCTOR __attribute__ ((constructor)) #define DESTRUCTOR __attribute__ ((destructor)) +#else +#define CONSTRUCTOR __attribute__ ((constructor(101))) +#define DESTRUCTOR __attribute__ ((destructor(101))) +#endif #ifdef DYNAMIC_ARCH gotoblas_t *gotoblas = NULL; @@ -795,12 +803,12 @@ static void *alloc_hugetlb(void *address){ if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { CloseHandle(hToken); - return -1; + return (void*)-1; } if (AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) != TRUE) { CloseHandle(hToken); - return -1; + return (void*)-1; } map_address = (void *)VirtualAlloc(address, @@ -1402,6 +1410,28 @@ void DESTRUCTOR gotoblas_quit(void) { #endif } +#if defined(_MSC_VER) && !defined(__clang__) +BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) +{ + switch (ul_reason_for_call) + { + case DLL_PROCESS_ATTACH: + gotoblas_init(); + break; + case DLL_THREAD_ATTACH: + break; + case DLL_THREAD_DETACH: + break; + case DLL_PROCESS_DETACH: + gotoblas_quit(); + break; + default: + break; + } + return TRUE; +} +#endif + #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) /* Don't call me; this is just work around for PGI / Sun bug */ void gotoblas_dummy_for_PGI(void) { diff --git a/getarch.c b/getarch.c index 89e736a31..0a49fd1b3 100644 --- a/getarch.c +++ b/getarch.c @@ -69,10 +69,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ -#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) +#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64) #define OS_WINDOWS #endif +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) +#define INTEL_AMD +#endif + #include #include #ifdef OS_WINDOWS @@ -750,7 +754,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHITECTURE "ARM" #define SUBARCHITECTURE "CORTEXA9" #define SUBDIRNAME "arm" -#define ARCHCONFIG "-DCORTEXA9 " \ +#define ARCHCONFIG "-DCORTEXA9 -DARMV7 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ @@ -765,7 +769,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHITECTURE "ARM" #define SUBARCHITECTURE "CORTEXA15" #define SUBDIRNAME "arm" -#define ARCHCONFIG "-DCORTEXA15 " \ +#define ARCHCONFIG "-DCORTEXA15 -DARMV7 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ @@ -830,7 +834,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define OPENBLAS_SUPPORTED #endif -#if defined(__i386__) || (__x86_64__) +#ifdef INTEL_AMD #include "cpuid_x86.c" #define OPENBLAS_SUPPORTED #endif @@ -925,7 +929,7 @@ int main(int argc, char *argv[]){ #ifdef FORCE printf("CORE=%s\n", CORENAME); #else -#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) +#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) printf("CORE=%s\n", get_corename()); #endif #endif @@ -945,7 +949,7 @@ int main(int argc, char *argv[]){ #endif -#if defined(__i386__) || defined(__x86_64__) +#ifdef INTEL_AMD #ifndef FORCE get_sse(); #else @@ -1025,7 +1029,7 @@ int main(int argc, char *argv[]){ #ifdef FORCE printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); #else -#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) +#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); #endif #endif diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt new file mode 100644 index 000000000..9ff924e5f --- /dev/null +++ b/interface/CMakeLists.txt @@ -0,0 +1,166 @@ + +include_directories(${CMAKE_SOURCE_DIR}) + + +set(BLAS1_SOURCES + copy.c + nrm2.c +) + +set(BLAS1_REAL_ONLY_SOURCES + rotm.c rotmg.c # N.B. these do not have complex counterparts + rot.c + asum.c +) + +# these will have 'z' prepended for the complex version +set(BLAS1_MANGLED_SOURCES + axpy.c swap.c + scal.c + dot.c + rotg.c + axpby.c +) + +# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f +# these all have 'z' sources for complex versions +set(BLAS2_SOURCES + gemv.c ger.c + trsv.c trmv.c symv.c + syr.c syr2.c gbmv.c + sbmv.c spmv.c + spr.c spr2.c + tbsv.c tbmv.c + tpsv.c tpmv.c +) + +set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES + hemv.c hbmv.c + her.c her2.c + hpmv.c hpr.c + hpr2.c +) + +# these do not have separate 'z' sources +set(BLAS3_SOURCES + gemm.c symm.c + trsm.c syrk.c syr2k.c +) + +set(BLAS3_MANGLED_SOURCES + omatcopy.c imatcopy.c + geadd.c +) + +# generate the BLAS objs once with and once without cblas +set (CBLAS_FLAGS "") + +if (NOT DEFINED NO_FBLAS) + list(APPEND CBLAS_FLAGS 0) +endif () + +if (NOT DEFINED NO_CBLAS) + list(APPEND CBLAS_FLAGS 1) +endif () + +foreach (CBLAS_FLAG ${CBLAS_FLAGS}) + + # TODO: don't compile complex sources with cblas for now, the naming schemes are all different and they will have to be handled separately from SINGLE/DOUBLE + set(DISABLE_COMPLEX 0) + set(MANGLE_COMPLEX 3) + if (CBLAS_FLAG EQUAL 1) +# set(DISABLE_COMPLEX 1) +# set(MANGLE_COMPLEX 1) + endif () + GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) + GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) + GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4) + GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) + GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + + #sdsdot, dsdot + GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE") + GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE") + + # trmm is trsm with a compiler flag set + GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) + + # max and imax are compiled 4 times + GenerateNamedObjects("max.c" "" "" ${CBLAS_FLAG}) + GenerateNamedObjects("max.c" "USE_ABS" "amax" ${CBLAS_FLAG}) + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG}) + GenerateNamedObjects("max.c" "USE_MIN" "min" ${CBLAS_FLAG}) + + GenerateNamedObjects("imax.c" "" "i*max" ${CBLAS_FLAG}) + GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" ${CBLAS_FLAG}) + GenerateNamedObjects("imax.c" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG}) + GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG}) + + +# complex-specific sources +foreach (float_type ${FLOAT_TYPES}) + + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "CONJ" "dotc" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "" "dotu" ${CBLAS_FLAG} "" "" false ${float_type}) + + GenerateNamedObjects("symm.c" "HEMM" "hemm" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("syrk.c" "HEMM" "herk" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("syr2k.c" "HEMM" "her2k" ${CBLAS_FLAG} "" "" false ${float_type}) + + if (USE_GEMM3M) + GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type}) + endif() + endif () + if (${float_type} STREQUAL "COMPLEX") + GenerateNamedObjects("zscal.c" "SSCAL" "sscal" ${CBLAS_FLAG} "" "" false "COMPLEX") + GenerateNamedObjects("nrm2.c" "" "scnrm2" ${CBLAS_FLAG} "" "" true "COMPLEX") + GenerateNamedObjects("zrot.c" "" "csrot" ${CBLAS_FLAG} "" "" true "COMPLEX") + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" ${CBLAS_FLAG} "" "" true "COMPLEX") + GenerateNamedObjects("max.c" "USE_ABS" "scamax" ${CBLAS_FLAG} "" "" true "COMPLEX") + GenerateNamedObjects("asum.c" "" "scasum" ${CBLAS_FLAG} "" "" true "COMPLEX") + endif () + if (${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("zscal.c" "SSCAL" "dscal" ${CBLAS_FLAG} "" "" false "ZCOMPLEX") + GenerateNamedObjects("nrm2.c" "" "dznrm2" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + GenerateNamedObjects("zrot.c" "" "zdrot" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + GenerateNamedObjects("max.c" "USE_ABS" "dzamax" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + GenerateNamedObjects("asum.c" "" "dzasum" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + endif () +endforeach () + +endforeach () + +#Special functions for CBLAS +if (NOT DEFINED NO_CBLAS) + foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + #cblas_dotc_sub cblas_dotu_sub + GenerateNamedObjects("zdot.c" "FORCE_USE_STACK" "dotu_sub" 1 "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "FORCE_USE_STACK;CONJ" "dotc_sub" 1 "" "" false ${float_type}) + endif() + endforeach () +endif() + +if (NOT DEFINED NO_LAPACK) + set(LAPACK_SOURCES + lapack/gesv.c + ) + + # prepend z for complex versions + set(LAPACK_MANGLED_SOURCES + lapack/getrf.c lapack/getrs.c lapack/potrf.c lapack/getf2.c + lapack/potf2.c lapack/laswp.c lapack/lauu2.c + lapack/lauum.c lapack/trti2.c lapack/trtri.c + ) + + GenerateNamedObjects("${LAPACK_SOURCES}") + GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) +endif () + +add_library(interface OBJECT ${OPENBLAS_SRC}) diff --git a/interface/gemm.c b/interface/gemm.c index a5a2b4724..7253b0500 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -121,6 +121,9 @@ void NAME(char *TRANSA, char *TRANSB, FLOAT *sa, *sb; #ifdef SMP + int nthreads_max; + int nthreads_avail; + double MNK; #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; @@ -237,6 +240,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS XFLOAT *sa, *sb; #ifdef SMP + int nthreads_max; + int nthreads_avail; + double MNK; #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; @@ -400,15 +406,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS mode |= (transa << BLAS_TRANSA_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT); - int nthreads_max = num_cpu_avail(3); - int nthreads_avail = nthreads_max; + nthreads_max = num_cpu_avail(3); + nthreads_avail = nthreads_max; #ifndef COMPLEX - double MNK = (double) args.m * (double) args.n * (double) args.k; + MNK = (double) args.m * (double) args.n * (double) args.k; if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) nthreads_max = 1; #else - double MNK = (double) args.m * (double) args.n * (double) args.k; + MNK = (double) args.m * (double) args.n * (double) args.k; if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) nthreads_max = 1; #endif diff --git a/interface/gemv.c b/interface/gemv.c index d298d79f6..0a222a645 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -81,6 +81,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *buffer; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { @@ -135,6 +138,9 @@ void CNAME(enum CBLAS_ORDER order, blasint info, t; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { @@ -235,10 +241,10 @@ void CNAME(enum CBLAS_ORDER order, #ifdef SMP - int nthreads_max = num_cpu_avail(2); - int nthreads_avail = nthreads_max; + nthreads_max = num_cpu_avail(2); + nthreads_avail = nthreads_max; - double MNK = (double) m * (double) n; + MNK = (double) m * (double) n; if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) ) nthreads_max = 1; diff --git a/interface/imax.c b/interface/imax.c index 55ffa7c6e..4378f1e22 100644 --- a/interface/imax.c +++ b/interface/imax.c @@ -136,6 +136,8 @@ blasint NAME(blasint *N, FLOAT *x, blasint *INCX){ ret = (blasint)MAX_K(n, x, incx); + if(ret > n) ret=n; + FUNCTION_PROFILE_END(COMPSIZE, n, 0); IDEBUG_END; @@ -159,6 +161,8 @@ CBLAS_INDEX CNAME(blasint n, FLOAT *x, blasint incx){ ret = MAX_K(n, x, incx); + if (ret > n) ret=n; + if (ret) ret --; FUNCTION_PROFILE_END(COMPSIZE, n, 0); diff --git a/interface/rotg.c b/interface/rotg.c index 49088ab02..a0e6efdab 100644 --- a/interface/rotg.c +++ b/interface/rotg.c @@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ #endif - -#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da = *DA; long double db = *DB; diff --git a/interface/zaxpby.c b/interface/zaxpby.c index 9e8324432..1abb24de9 100644 --- a/interface/zaxpby.c +++ b/interface/zaxpby.c @@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT * #endif - if (n <= 0) return; - FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); + if (n <= 0) return; + FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx * 2; diff --git a/interface/zdot.c b/interface/zdot.c index 1380ce292..d4d0fab92 100644 --- a/interface/zdot.c +++ b/interface/zdot.c @@ -57,21 +57,25 @@ #ifdef RETURN_BY_STRUCT MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { #elif defined RETURN_BY_STACK -void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { +void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { #else -FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { +OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { #endif BLASLONG n = *N; BLASLONG incx = *INCX; BLASLONG incy = *INCY; #ifndef RETURN_BY_STACK - FLOAT _Complex ret; + OPENBLAS_COMPLEX_FLOAT ret; #endif #ifdef RETURN_BY_STRUCT MYTYPE myret; #endif +#ifndef RETURN_BY_STRUCT + OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); +#endif + PRINT_DEBUG_NAME; if (n <= 0) { @@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, myret.i = 0.; return myret; #elif defined RETURN_BY_STACK - *result = ZERO; + *result = zero; return; #else - return ZERO; + return zero; #endif } @@ -144,21 +148,24 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, #else #ifdef FORCE_USE_STACK -void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){ +void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){ #else -FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ +OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ - FLOAT _Complex ret; + OPENBLAS_COMPLEX_FLOAT ret; + OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); #endif PRINT_DEBUG_CNAME; if (n <= 0) { #ifdef FORCE_USE_STACK - *result = ZERO; + //*result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); + CREAL(*result) = 0.0; + CIMAG(*result) = 0.0; return; #else - return ZERO; + return zero; #endif } diff --git a/interface/zgemv.c b/interface/zgemv.c index 704034aaf..520136b45 100644 --- a/interface/zgemv.c +++ b/interface/zgemv.c @@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *buffer; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, @@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N, blasint lenx, leny; blasint i; - PRINT_DEBUG_NAME; - FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); + PRINT_DEBUG_NAME; + TOUPPER(trans); info = 0; @@ -145,6 +148,9 @@ void CNAME(enum CBLAS_ORDER order, blasint info, t; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, @@ -153,14 +159,14 @@ void CNAME(enum CBLAS_ORDER order, GEMV_O, GEMV_U, GEMV_S, GEMV_D, }; - PRINT_DEBUG_CNAME; - FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); + PRINT_DEBUG_CNAME; + trans = -1; info = 0; @@ -234,10 +240,10 @@ void CNAME(enum CBLAS_ORDER order, #ifdef SMP - int nthreads_max = num_cpu_avail(2); - int nthreads_avail = nthreads_max; + nthreads_max = num_cpu_avail(2); + nthreads_avail = nthreads_max; - double MNK = (double) m * (double) n; + MNK = (double) m * (double) n; if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) nthreads_max = 1; diff --git a/interface/zrotg.c b/interface/zrotg.c index e9e8a11df..187343d41 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -6,13 +6,7 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ - PRINT_DEBUG_NAME; - - IDEBUG_START; - - FUNCTION_PROFILE_START(); - -#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da_r = *(DA + 0); long double da_i = *(DA + 1); @@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ long double ada = fabs(da_r) + fabs(da_i); + PRINT_DEBUG_NAME; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + if (ada == ZERO) { *C = ZERO; *(S + 0) = ONE; @@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ FLOAT ada = fabs(da_r) + fabs(da_i); FLOAT adb; + PRINT_DEBUG_NAME; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + if (ada == ZERO) { *C = ZERO; *(S + 0) = ONE; diff --git a/interface/zsyr.c b/interface/zsyr.c index 5fe29cefa..09b1de578 100644 --- a/interface/zsyr.c +++ b/interface/zsyr.c @@ -121,6 +121,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO FLOAT *buffer; int trans, uplo; blasint info; + FLOAT * ALPHA = α + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; #ifdef SMP int nthreads; #endif diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt new file mode 100644 index 000000000..8a3b021cc --- /dev/null +++ b/kernel/CMakeLists.txt @@ -0,0 +1,428 @@ + +include_directories(${CMAKE_SOURCE_DIR}) +include("${CMAKE_SOURCE_DIR}/cmake/kernel.cmake") + +# Makefile + +if (DEFINED TARGET_CORE) + #override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) + set(BUILD_KERNEL 1) + set(KDIR "") + set(TSUFFIX "_${TARGET_CORE}") +else () + set(TARGET_CORE ${CORE}) + set(KDIR "") + set(TSUFFIX "") +endif () + +SetDefaultL1() +SetDefaultL2() +SetDefaultL3() +ParseMakefileVars("${KERNELDIR}/KERNEL") +ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") + +if (${ARCH} STREQUAL "x86") +if (NOT MSVC) + GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true) +else() + GenerateNamedObjects("${KERNELDIR}/cpuid_win.c" "" "" false "" "" true) +endif() +endif () + +# don't use float type name mangling here +GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" false "" "" true) +GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" false "" "" true) +GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" false "" "" true) + +# Makefile.L1 +foreach (float_type ${FLOAT_TYPES}) + # a bit of metaprogramming here to pull out the appropriate KERNEL var + string(SUBSTRING ${float_type} 0 1 float_char) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type}) + if (DEFINED ${float_char}MAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) + endif () + if (DEFINED ${float_char}MINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type}) + endif () + GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) + if (DEFINED I${float_char}MAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) + endif () + if (DEFINED I${float_char}MINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type}) + endif () + GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) + + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dotu_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "CONJ" "dotc_k" false "" "" false ${float_type}) + else () + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) + endif () + + if (${float_type} STREQUAL "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type}) + endif() + if (${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type}) + endif() + +endforeach () + +#dsdot,sdsdot +GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") +GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") + +# Makefile.L2 +GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) +GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) +foreach (float_type ${FLOAT_TYPES}) + string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "XCONJ" "gerv_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ;XCONJ" "gerd_k" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type}) + + else () + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) + endif () +endforeach () + +# Makefile.L3 +set(USE_TRMM false) + +if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell") + set(USE_TRMM true) +endif () + +foreach (float_type ${FLOAT_TYPES}) + string(SUBSTRING ${float_type} 0 1 float_char) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) + + if (${float_char}GEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type}) + endif () + + if (${float_char}GEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMITCOPY}" "${float_type}" "${${float_char}GEMMITCOPYOBJ}" false "" "" true ${float_type}) + endif () + + if (${float_char}GEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMONCOPY}" "${float_type}" "${${float_char}GEMMONCOPYOBJ}" false "" "" true ${float_type}) + endif () + + if (${float_char}GEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "${float_type}" "${${float_char}GEMMOTCOPYOBJ}" false "" "" true ${float_type}) + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "gemm_beta" false "" "" false ${float_type}) + + if (USE_TRMM) + set(TRMM_KERNEL "${${float_char}TRMMKERNEL}") + else () + set(TRMM_KERNEL "${${float_char}GEMMKERNEL}") + endif () + + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + + # just enumerate all these. there is an extra define for these indicating which side is a conjugate (e.g. CN NC NN) that I don't really want to work into GenerateCombinationObjects + + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;CONJ;CN" "trmm_kernel_LC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;NN" "trmm_kernel_RN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) + + + #hemm + GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type}) + + # symm for c and z + GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_M}.c" "" "symm_iutcopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) + + else () #For real + GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) + + # symm for s and d + GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_M}.c" "" "symm_iutcopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type}) + + # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. + # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. + + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) + + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type}) + + + + if (NOT DEFINED ${float_char}OMATCOPY_CN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CN ../arm/zomatcopy_cn.c) + else () + set(${float_char}OMATCOPY_CN ../arm/omatcopy_cn.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RN ../arm/zomatcopy_rn.c) + else () + set(${float_char}OMATCOPY_RN ../arm/omatcopy_rn.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_CT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CT ../arm/zomatcopy_ct.c) + else () + set(${float_char}OMATCOPY_CT ../arm/omatcopy_ct.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RT ../arm/zomatcopy_rt.c) + else () + set(${float_char}OMATCOPY_RT ../arm/omatcopy_rt.c) + endif () + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type}) + + if (NOT DEFINED ${float_char}OMATCOPY_CNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_CTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c) + endif () + endif () + + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type}) + endif() + + #imatcopy + if (NOT DEFINED ${float_char}IMATCOPY_CN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CN ../generic/zimatcopy_cn.c) + else () + set(${float_char}IMATCOPY_CN ../generic/imatcopy_cn.c) + endif () + endif () + + if (NOT DEFINED ${float_char}IMATCOPY_RN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RN ../generic/zimatcopy_rn.c) + else () + set(${float_char}IMATCOPY_RN ../generic/imatcopy_rn.c) + endif () + endif () + + if (NOT DEFINED ${float_char}IMATCOPY_CT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CT ../generic/zimatcopy_ct.c) + else () + set(${float_char}IMATCOPY_CT ../generic/imatcopy_ct.c) + endif () + endif () + + if (NOT DEFINED ${float_char}IMATCOPY_RT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RT ../generic/zimatcopy_rt.c) + else () + set(${float_char}IMATCOPY_RT ../generic/imatcopy_rt.c) + endif () + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CN}" "" "imatcopy_k_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RN}" "ROWM" "imatcopy_k_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CT}" "" "imatcopy_k_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RT}" "ROWM" "imatcopy_k_rt" false "" "" false ${float_type}) + + + if (NOT DEFINED ${float_char}IMATCOPY_CNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CNC ../generic/zimatcopy_cnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}IMATCOPY_RNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RNC ../generic/zimatcopy_rnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}IMATCOPY_CTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CTC ../generic/zimatcopy_ctc.c) + endif () + endif () + if (NOT DEFINED ${float_char}IMATCOPY_RTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RTC ../generic/zimatcopy_rtc.c) + endif () + endif () + + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CNC}" "CONJ" "imatcopy_k_cnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RNC}" "CONJ;ROWM" "imatcopy_k_rnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CTC}" "CONJ" "imatcopy_k_ctc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RTC}" "CONJ;ROWM" "imatcopy_k_rtc" false "" "" false ${float_type}) + endif() + + #geadd + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) +endforeach () + +# Makefile.LA +#DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX) + +add_library(kernel OBJECT ${OPENBLAS_SRC}) diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 7da4bcb92..63e675b8d 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -3640,7 +3640,7 @@ ifndef DGEADD_K DGEADD_K = ../generic/geadd.c endif -$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) +$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K) $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ ifndef CGEADD_K diff --git a/kernel/arm/zaxpby.c b/kernel/arm/zaxpby.c index 2e0c2940d..d9948349d 100644 --- a/kernel/arm/zaxpby.c +++ b/kernel/arm/zaxpby.c @@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL BLASLONG ix,iy; FLOAT temp; + BLASLONG inc_x2; + BLASLONG inc_y2; + if ( n < 0 ) return(0); ix = 0; iy = 0; - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; if ( beta_r == 0.0 && beta_i == 0.0) { diff --git a/kernel/arm/zaxpy.c b/kernel/arm/zaxpy.c index 929ee8b54..1dcaeac27 100644 --- a/kernel/arm/zaxpy.c +++ b/kernel/arm/zaxpy.c @@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, { BLASLONG i=0; BLASLONG ix,iy; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n < 0 ) return(0); if ( da_r == 0.0 && da_i == 0.0 ) return(0); @@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, ix = 0; iy = 0; - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; while(i < n) { diff --git a/kernel/arm/zcopy.c b/kernel/arm/zcopy.c index f720d6ee5..07fe584c5 100644 --- a/kernel/arm/zcopy.c +++ b/kernel/arm/zcopy.c @@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) { BLASLONG i=0; BLASLONG ix=0,iy=0; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n < 0 ) return(0); - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; while(i < n) { diff --git a/kernel/arm/zdot.c b/kernel/arm/zdot.c index 469487531..57f47e58e 100644 --- a/kernel/arm/zdot.c +++ b/kernel/arm/zdot.c @@ -35,25 +35,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **************************************************************************************/ #include "common.h" -#include +#ifndef _MSC_VER +#include FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else +OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif { BLASLONG i=0; BLASLONG ix=0,iy=0; FLOAT dot[2]; - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; + BLASLONG inc_x2; + BLASLONG inc_y2; dot[0]=0.0; dot[1]=0.0; - __real__ result = 0.0 ; - __imag__ result = 0.0 ; + CREAL(result) = 0.0 ; + CIMAG(result) = 0.0 ; if ( n < 1 ) return(result); - BLASLONG inc_x2 = 2 * inc_x ; - BLASLONG inc_y2 = 2 * inc_y ; + inc_x2 = 2 * inc_x ; + inc_y2 = 2 * inc_y ; while(i < n) { @@ -69,8 +75,8 @@ FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG in i++ ; } - __real__ result = dot[0]; - __imag__ result = dot[1]; + CREAL(result) = dot[0]; + CIMAG(result) = dot[1]; return(result); } diff --git a/kernel/arm/zrot.c b/kernel/arm/zrot.c index 356a4df72..98be68db8 100644 --- a/kernel/arm/zrot.c +++ b/kernel/arm/zrot.c @@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT BLASLONG i=0; BLASLONG ix=0,iy=0; FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n <= 0 ) return(0); - BLASLONG inc_x2 = 2 * inc_x ; - BLASLONG inc_y2 = 2 * inc_y ; + inc_x2 = 2 * inc_x ; + inc_y2 = 2 * inc_y ; while(i < n) { diff --git a/kernel/arm/zswap.c b/kernel/arm/zswap.c index fcfb38506..ae4760ae0 100644 --- a/kernel/arm/zswap.c +++ b/kernel/arm/zswap.c @@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm BLASLONG i=0; BLASLONG ix=0,iy=0; FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n < 0 ) return(0); - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; while(i < n) { diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 0eeac2e1f..a4d1486fc 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -550,6 +550,13 @@ gotoblas_t TABLE_NAME = { zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, + simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS, + dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS, + cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS, + cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS, + zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS, + zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS, + sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS }; diff --git a/kernel/x86/cpuid_win.c b/kernel/x86/cpuid_win.c new file mode 100644 index 000000000..a1b00016b --- /dev/null +++ b/kernel/x86/cpuid_win.c @@ -0,0 +1,41 @@ +/*************************************************************************** +Copyright (c) 2015, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#if defined(_MSC_VER) && !defined(__clang__) + +#include + +void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + int cpuInfo[4] = {-1}; + __cpuid(cpuInfo, op); + *eax = cpuInfo[0]; + *ebx = cpuInfo[1]; + *ecx = cpuInfo[2]; + *edx = cpuInfo[3]; +} +#endif diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 9db66818f..2dcc8658b 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -119,11 +119,11 @@ XCOPYKERNEL = zcopy.S endif ifndef SDOTKERNEL -SDOTKERNEL = ../generic/dot.c +SDOTKERNEL = ../generic/dot.c endif ifndef DSDOTKERNEL -DSDOTKERNEL = ../generic/dot.c +DSDOTKERNEL = ../generic/dot.c endif ifndef DDOTKERNEL diff --git a/kernel/x86_64/KERNEL.generic b/kernel/x86_64/KERNEL.generic index 672edb069..a23e59f3f 100644 --- a/kernel/x86_64/KERNEL.generic +++ b/kernel/x86_64/KERNEL.generic @@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c ZHEMV_U_KERNEL = ../generic/zhemv_k.c ZHEMV_L_KERNEL = ../generic/zhemv_k.c +LSAME_KERNEL = ../generic/lsame.c +SCABS_KERNEL = ../generic/cabs.c +DCABS_KERNEL = ../generic/cabs.c +QCABS_KERNEL = ../generic/cabs.c + +#Dump kernel CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c diff --git a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c index 504c784ac..ac8c97d03 100644 --- a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c +++ b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c @@ -7,7 +7,7 @@ static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOA static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOAT *C0, FLOAT *C1, FLOAT *C2,FLOAT *C3, FLOAT *C4, FLOAT *C5,FLOAT *C6, FLOAT *C7) { - BLASLONG I = 0; + BLASLONG i = 0; BLASLONG temp1 = n * 8; __asm__ __volatile__ @@ -110,7 +110,7 @@ static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOA : : - "a" (I), // 0 + "a" (i), // 0 "r" (temp1), // 1 "S" (a), // 2 "D" (b), // 3 diff --git a/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c b/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c index 1cd20e1ca..5cf66424d 100644 --- a/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_cunmlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, @@ -41,20 +41,22 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, lapack_complex_float* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_order == LAPACK_COL_MAJOR ) { + lapack_int r; + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_cunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lapack_int lda_t = MAX(1,k); lapack_int ldc_t = MAX(1,m); lapack_complex_float* a_t = NULL; lapack_complex_float* c_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_cunmlq_work", info ); return info; @@ -84,8 +86,8 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_cge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_cge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_cunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack-netlib/lapacke/src/lapacke_dormlq_work.c b/lapack-netlib/lapacke/src/lapacke_dormlq_work.c index 9a7a997fe..99a7c3c71 100644 --- a/lapack-netlib/lapacke/src/lapacke_dormlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_dormlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,27 +33,29 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_dormlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ) { lapack_int info = 0; + lapack_int r; lapack_int lda_t, ldc_t; double *a_t = NULL, *c_t = NULL; - if( matrix_order == LAPACK_COL_MAJOR ) { + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_dormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lda_t = MAX(1,k); ldc_t = MAX(1,m); /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_dormlq_work", info ); return info; @@ -81,8 +83,8 @@ lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_dge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_dormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack-netlib/lapacke/src/lapacke_sormlq_work.c b/lapack-netlib/lapacke/src/lapacke_sormlq_work.c index 7a7464d18..bbf55bd84 100644 --- a/lapack-netlib/lapacke/src/lapacke_sormlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_sormlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,27 +33,29 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_sormlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ) { lapack_int info = 0; + lapack_int r; lapack_int lda_t, ldc_t; float *a_t = NULL, *c_t = NULL; - if( matrix_order == LAPACK_COL_MAJOR ) { + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_sormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lda_t = MAX(1,k); ldc_t = MAX(1,m); /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_sormlq_work", info ); return info; @@ -81,8 +83,8 @@ lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_sge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_sge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_sormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c b/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c index 8677ac0bc..38a2d947a 100644 --- a/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_zunmlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, @@ -41,20 +41,22 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, lapack_complex_double* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_order == LAPACK_COL_MAJOR ) { + lapack_int r; + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_zunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lapack_int lda_t = MAX(1,k); lapack_int ldc_t = MAX(1,m); lapack_complex_double* a_t = NULL; lapack_complex_double* c_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_zunmlq_work", info ); return info; @@ -84,8 +86,8 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_zge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_zge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_zunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt new file mode 100644 index 000000000..de42e1ab6 --- /dev/null +++ b/lapack/CMakeLists.txt @@ -0,0 +1,98 @@ + +include_directories(${CMAKE_SOURCE_DIR}) + + +set(LAPACK_SOURCES + getrf/getrf_single.c + potrf/potrf_U_single.c + potrf/potrf_L_single.c + lauum/lauum_U_single.c + lauum/lauum_L_single.c +) + +# add a 'z' to filename for complex version +set(LAPACK_MANGLED_SOURCES + getf2/getf2_k.c + lauu2/lauu2_U.c + lauu2/lauu2_L.c + potf2/potf2_U.c + potf2/potf2_L.c +) + +# sources that need TRANS set +# this has a 'z' version +set(TRANS_SOURCES + getrs/getrs_single.c +) + +# sources that need UNIT set +# these do NOT have a z version +set(UNIT_SOURCES + trtri/trtri_U_single.c + trtri/trtri_L_single.c +) + +# these have a 'z' version +set(UNIT_SOURCES2 + trti2/trti2_U.c + trti2/trti2_L.c +) + +GenerateNamedObjects("${LAPACK_SOURCES}") +GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) + +# TODO: laswp needs arch specific code +GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus" false "" "" false 3) +GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus" false "" "" false 3) + +if (SMP) + + if (USE_OPENMP) + set(GETRF_SRC getrf/getrf_parallel_omp.c) + else () + set(GETRF_SRC getrf/getrf_parallel.c) + endif () + + # these do not have 'z' versions + set(PARALLEL_SOURCES + ${GETRF_SRC} + lauum/lauum_U_parallel.c + lauum/lauum_L_parallel.c + potrf/potrf_U_parallel.c + potrf/potrf_L_parallel.c + ) + + # this has a z version + list(APPEND TRANS_SOURCES + getrs/getrs_parallel.c + ) + + # these do NOT have a z version + list(APPEND UNIT_SOURCES + trtri/trtri_U_parallel.c + trtri/trtri_L_parallel.c + ) + + GenerateNamedObjects("${PARALLEL_SOURCES}") +endif () + +foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + foreach (trans_src ${TRANS_SOURCES}) + string(REGEX MATCH "[a-z]/([a-z]+_)([a-z]+)" op_name ${trans_src}) + string(REPLACE "/" "/z" ztrans_src ${trans_src}) + GenerateNamedObjects("${ztrans_src}" "TRANS=1" "${CMAKE_MATCH_1}N_${CMAKE_MATCH_2}" false "" "" false ${float_type}) + GenerateNamedObjects("${ztrans_src}" "TRANS=2" "${CMAKE_MATCH_1}T_${CMAKE_MATCH_2}" false "" "" false ${float_type}) + GenerateNamedObjects("${ztrans_src}" "TRANS=3" "${CMAKE_MATCH_1}R_${CMAKE_MATCH_2}" false "" "" false ${float_type}) + GenerateNamedObjects("${ztrans_src}" "TRANS=4" "${CMAKE_MATCH_1}C_${CMAKE_MATCH_2}" false "" "" false ${float_type}) + endforeach () + else () + GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" false ${float_type}) + endif () +endforeach () + +GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) +GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3) + +add_library(lapack OBJECT ${OPENBLAS_SRC}) + diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c index a76be3ba7..8fdf76987 100644 --- a/lapack/getrf/getrf_parallel.c +++ b/lapack/getrf/getrf_parallel.c @@ -67,7 +67,7 @@ double sqrt(double); #undef GETRF_FACTOR #define GETRF_FACTOR 1.00 -static inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) { +static __inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) { double m = (double)(M - IS - BK); double n = (double)(N - IS - BK); @@ -373,7 +373,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, BLASLONG num_cpu; +#ifdef _MSC_VER + BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE]; +#else volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128))); +#endif #ifndef COMPLEX #ifdef XDOUBLE diff --git a/openblas_config_template.h b/openblas_config_template.h index 3b3435b0e..942a8f547 100644 --- a/openblas_config_template.h +++ b/openblas_config_template.h @@ -59,7 +59,8 @@ typedef int blasint; extension since version 3.0. If neither are available, use a compatible structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus))) + (__GNUC__ >= 3 && !defined(__cplusplus)) || \ + _MSC_VER >= 1800) // Visual Studio 2013 supports complex #define OPENBLAS_COMPLEX_C99 #ifndef __cplusplus #include diff --git a/symcopy.h b/symcopy.h index 48ccbd369..16172c046 100644 --- a/symcopy.h +++ b/symcopy.h @@ -43,7 +43,7 @@ #if !defined(XDOUBLE) || !defined(QUAD_PRECISION) -static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -141,7 +141,7 @@ static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -232,7 +232,7 @@ static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } -static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -362,7 +362,7 @@ static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -486,7 +486,7 @@ static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -613,7 +613,7 @@ static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -735,7 +735,7 @@ static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } -static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -862,7 +862,7 @@ static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -984,7 +984,7 @@ static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } -static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1082,7 +1082,7 @@ static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1180,7 +1180,7 @@ static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1270,7 +1270,7 @@ static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1360,7 +1360,7 @@ static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1490,7 +1490,7 @@ static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1620,7 +1620,7 @@ static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1744,7 +1744,7 @@ static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 000000000..cd4497117 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,38 @@ +include_directories(${CMAKE_SOURCE_DIR}) + +enable_language(Fortran) + +set(OpenBLAS_Tests + sblat1 sblat2 sblat3 + dblat1 dblat2 dblat3 + cblat1 cblat2 cblat3 + zblat1 zblat2 zblat3) + +foreach(test_bin ${OpenBLAS_Tests}) +add_executable(${test_bin} ${test_bin}.f) +target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}_static) +endforeach() + +# $1 exec, $2 input, $3 output_result +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh +"rm -f $3\n" +"$1 < $2\n" +"grep -q FATAL $3\n" +"if [ $? -eq 0 ]; then\n" +"echo Error\n" +"exit 1\n" +"else\n" +"exit 0\n" +"fi\n" +) + +set(float_types s d c z) +foreach(float_type ${float_types}) +string(TOUPPER ${float_type} float_type_upper) +add_test(NAME "${float_type}blas1" + COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat1") +add_test(NAME "${float_type}blas2" + COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat2" "${PROJECT_SOURCE_DIR}/test/${float_type}blat2.dat" ${float_type_upper}BLAT2.SUMM) +add_test(NAME "${float_type}blas3" + COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat3" "${PROJECT_SOURCE_DIR}/test/${float_type}blat3.dat" ${float_type_upper}BLAT3.SUMM) +endforeach() \ No newline at end of file