From e5c47e44f690ef661af893608f47d56b1d3a1cb4 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 30 Dec 2014 21:53:00 -0600 Subject: [PATCH 001/137] First pass at converting a few makefiles to CMake. --- .gitignore | 1 + CMakeLists.txt | 18 ++++++ cmake/c_check.cmake | 29 ++++++++++ cmake/f_check.cmake | 37 ++++++++++++ cmake/prebuild.cmake | 55 ++++++++++++++++++ cmake/system.cmake | 130 +++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 270 insertions(+) create mode 100644 CMakeLists.txt create mode 100644 cmake/c_check.cmake create mode 100644 cmake/f_check.cmake create mode 100644 cmake/prebuild.cmake create mode 100644 cmake/system.cmake diff --git a/.gitignore b/.gitignore index 7422cead3..bae3d057f 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,4 @@ test/sblat3 test/zblat1 test/zblat2 test/zblat3 +build diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..2dbb6f059 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,18 @@ +## +## Author: Hank Anderson +## Copyright: (c) Stat-Ease, Inc. +## Created: 12/23/14 +## Last Modified: 12/23/14 +## + +cmake_minimum_required(VERSION 2.8.4) +project(OpenBLAS) + +# is this necessary? lapack-netlib has its own fortran checks in its CMakeLists.txt +#enable_language(Fortran) + +message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with.") + +include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") + + diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake new file mode 100644 index 000000000..20c0aa72f --- /dev/null +++ b/cmake/c_check.cmake @@ -0,0 +1,29 @@ +## +## Author: Hank Anderson +## Copyright: (c) Stat-Ease, Inc. +## Created: 12/29/14 +## Last Modified: 12/29/14 +## Description: Ported from the OpenBLAS/c_check perl script. +## This is triggered by prebuild.cmake and runs before any of the code is built. +## Creates config.h and Makefile.conf. + +# N.B. c_check is not cross-platform, so instead try to use CMake variables. Alternatively, could use try_compile to get some of this info the same way c_check does. + +# run c_check (creates the TARGET files) +# message(STATUS "Running c_check...") +# execute_process(COMMAND perl c_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_CXX_COMPILER} +# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +# TODO: is ${BINARY} sufficient for the __32BIT__ define? +# TODO: CMAKE_SYSTEM_PROCESSOR is not set by CMake, need to set it manually when doing a cross-compile +# TODO: CMAKE_CXX_COMPILER_ID and CMAKE_SYSTEM_NAME are probably not the same strings as OpenBLAS is expecting +# TODO: detect this +set(NEED_FU 1) + +file(WRITE ${TARGET_CONF} + "#define OS_${CMAKE_SYSTEM_NAME}\t1\n" + "#define ARCH_${CMAKE_SYSTEM_PROCESSOR}\t1\n" + "#define C_${CMAKE_CXX_COMPILER_ID}\t1\n" + "#define __${BINARY}BIT__\t1\n" + "#define FUNDERSCORE\t${NEED_FU}\n") + diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake new file mode 100644 index 000000000..a291430aa --- /dev/null +++ b/cmake/f_check.cmake @@ -0,0 +1,37 @@ +## +## Author: Hank Anderson +## Copyright: (c) Stat-Ease, Inc. +## Created: 12/29/14 +## Last Modified: 12/29/14 +## Description: Ported from the OpenBLAS/f_check perl script. +## This is triggered by prebuild.cmake and runs before any of the code is built. +## Appends Fortran information to config.h and Makefile.conf. + + +if (NOT ${ONLY_CBLAS}) + # N.B. f_check is not cross-platform, so instead try to use CMake variables + # run f_check (appends to TARGET files) +# message(STATUS "Running f_check...") +# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER} +# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + + # TODO: is BU makefile macro needed? + # TODO: detect whether underscore needed, set #defines appropriately - use try_compile + # TODO: set FEXTRALIB flags a la f_check? + + set(BU "_") + file(APPEND ${TARGET_CONF} + "#define BUNDERSCORE _\n" + "#define NEEDBUNDERSCORE 1\n" + "#define NEED2UNDERSCORES 0\n") + +else () + + #When we only build CBLAS, we set NOFORTRAN=2 + set(NOFORTRAN 2) + set(NO_FBLAS 1) + set(BU "_") + file(APPEND ${TARGET_CONF} + "#define BUNDERSCORE _\n" + "#define NEEDBUNDERSCORE 1\n") +endif() diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake new file mode 100644 index 000000000..34a683a4f --- /dev/null +++ b/cmake/prebuild.cmake @@ -0,0 +1,55 @@ +## +## Author: Hank Anderson +## Copyright: (c) Stat-Ease, Inc. +## Created: 12/29/14 +## Last Modified: 12/29/14 +## Description: Ported from OpenBLAS/Makefile.prebuild +## This is triggered by system.cmake and runs before any of the code is built. +## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files). +## Next it runs f_check and appends some fortran information to the files. +## Finally it runs getarch and getarch_2nd for even more environment information. + +# CPUIDEMU = ../../cpuid/table.o + +if (DEFINED CPUIDEMU) + set(EXFLAGS "-DCPUIDEMU -DVENDOR=99") +endif () + +if (DEFINED TARGET_CORE) + # set the C flags for just this file + set_source_files_properties(getarch_2nd.c PROPERTIES COMPILE_FLAGS "-DBUILD_KERNEL") + set(TARGET_MAKE "Makefile_kernel.conf") + set(TARGET_CONF "config_kernel.h") +else() + set(TARGET_MAKE "Makefile.conf") + set(TARGET_CONF "config.h") +endif () + +include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") +include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") + +# compile getarch +# TODO: need to use execute_process here, or compilation won't happen until later - maybe make temporary CMakeLists.txt file using file() ? +#add_executable(getarch getarch.c cpuid.S ${CPUIDEMU} +# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +# +## run getarch, which appends even more to the TARGET files +#message(STATUS "Running getarch") +#execute_process(COMMAND getarch 0 >> ${TARGET_MAKE} +# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +#execute_process(COMMAND getarch 1 >> ${TARGET_CONF} +# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +# +## config.h is ready for getarch_2nd now, so compile that +#set(GETARCH2_SOURCES getarch_2nd.c config.h) +#add_executable(getarch_2nd getarch_2nd.c config.h) +# +## finally run getarch_2nd, appending yet more to the TARGET files +#message(STATUS "Running getarch_2nd") +#execute_process(COMMAND getarch_2nd 0 >> ${TARGET_MAKE} +# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +#execute_process(COMMAND getarch_2nd 1 >> ${TARGET_CONF} +# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) + +# TODO: need to read in the vars from Makefile.conf/Makefile_kernel.conf + diff --git a/cmake/system.cmake b/cmake/system.cmake new file mode 100644 index 000000000..e5c66f3ed --- /dev/null +++ b/cmake/system.cmake @@ -0,0 +1,130 @@ +## +## Author: Hank Anderson +## Copyright: (c) Stat-Ease, Inc. +## Created: 12/29/14 +## Last Modified: 12/29/14 +## Description: Ported from OpenBLAS/Makefile.system +## + +set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib") + +# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa +# http://stackoverflow.com/questions/714100/os-detecting-makefile + +# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa + +# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1. +if (DEFINED TARGET_CORE) + set(TARGET ${TARGET_CORE}) +endif () + +# Force fallbacks for 32bit +if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) + message(STATUS "Compiling a ${BINARY}-bit binary.") + set(NO_AVX 1) + if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE") + set(TARGET "NEHALEM") + endif () + if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER") + set(TARGET "BARCELONA") + endif () +endif () + +if (DEFINED TARGET) + message(STATUS "Targetting the ${TARGET} architecture.") + set(GETARCH_FLAGS "-DFORCE_${TARGET}") +endif () + +if (${INTERFACE64}) + message(STATUS "Using 64-bit integers.") + set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT") +endif () + +if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD) + set(GEMM_MULTITHREAD_THRESHOLD 4) +endif () +message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.") +set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}") + +if (${NO_AVX}) + message(STATUS "Disabling Advanced Vector Extensions (AVX).") + set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX") +endif () + +if (${NO_AVX2}) + message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).") + set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2") +endif () + +if (CMAKE_BUILD_TYPE STREQUAL Debug) + set(GETARCH_FLAGS "${GETARCH_FLAGS} -g") +endif () + +# TODO: let CMake handle this? -hpa +#if (${QUIET_MAKE}) +# set(MAKE "${MAKE} -s") +#endif() + +if (NOT DEFINED NO_PARALLEL_MAKE) + set(NO_PARALLEL_MAKE 0) +endif () +set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}") + +if (CMAKE_CXX_COMPILER STREQUAL loongcc) + set(GETARCH_FLAGS "${GETARCH_FLAGS} -static") +endif () + +#if don't use Fortran, it will only compile CBLAS. +if (${ONLY_CBLAS}) + set(NO_LAPACK 1) +else () + set(ONLY_CBLAS 0) +endif () + +include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake") + +if (NOT DEFINED NUM_THREADS) + # TODO: NUM_CORES comes from `getarch.c` or `cpuid_x86.c`. This is built and executed above in `Makefile.prebuild`, and the results are in `Makefile.conf` and `Makefile_kernel.conf`. -hpa + set(NUM_THREADS ${NUM_CORES}) +endif () + +if (NUM_THREADS EQUALS 1) + # TODO: was "override USE_THREAD = 0", do we need override here? -hpa + set(USE_THREAD 0) +endif () + +if (DEFINED USE_THREAD) + if (NOT ${USE_THREAD}) + unset(SMP) + else () + set(SMP 1) + endif () +else () + # N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa + if (${NUM_THREADS} EQUALS 1) + unset(SMP) + else () + set(SMP 1) + endif () +endif () + +if (${SMP}) + message("SMP enabled.") +endif () + +if (NOT DEFINED NEED_PIC) + set(NEED_PIC 1) +endif () + +# TODO: I think CMake should be handling all this stuff -hpa +unset(ARFLAGS) +set(CPP "${COMPILER} -E") +set(AR "${CROSS_SUFFIX}ar") +set(AS "$(CROSS_SUFFIX)as") +set(LD "$(CROSS_SUFFIX)ld") +set(RANLIB "$(CROSS_SUFFIX)ranlib") +set(NM "$(CROSS_SUFFIX)nm") +set(DLLWRAP "$(CROSS_SUFFIX)dllwrap") +set(OBJCOPY "$(CROSS_SUFFIX)objcopy") +set(OBJCONV "$(CROSS_SUFFIX)objconv") + From 1a41022e3ef23d3d3d52e9884763e263a55fbe02 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 1 Jan 2015 21:01:28 -0600 Subject: [PATCH 002/137] Added MSVC defines to cpuid.h and getarch.c. --- cpuid.h | 6 +++++- getarch.c | 14 +++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cpuid.h b/cpuid.h index ab6a3fb32..406b7fa25 100644 --- a/cpuid.h +++ b/cpuid.h @@ -39,6 +39,10 @@ #ifndef CPUID_H #define CPUID_H +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) +#define INTEL_AMD +#endif + #define VENDOR_INTEL 1 #define VENDOR_UMC 2 #define VENDOR_AMD 3 @@ -59,7 +63,7 @@ #define FAMILY_PM 7 #define FAMILY_IA64 8 -#if defined(__i386__) || defined(__x86_64__) +#ifdef INTEL_AMD #define GET_EXFAMILY 1 #define GET_EXMODEL 2 #define GET_TYPE 3 diff --git a/getarch.c b/getarch.c index f6a5ecb94..8a6b4dcd1 100644 --- a/getarch.c +++ b/getarch.c @@ -69,10 +69,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* or implied, of The University of Texas at Austin. */ /*********************************************************************/ -#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) +#if defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64) #define OS_WINDOWS #endif +#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) +#define INTEL_AMD +#endif + #include #include #ifdef OS_WINDOWS @@ -783,7 +787,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define OPENBLAS_SUPPORTED #endif -#if defined(__i386__) || (__x86_64__) +#ifdef INTEL_AMD #include "cpuid_x86.c" #define OPENBLAS_SUPPORTED #endif @@ -878,7 +882,7 @@ int main(int argc, char *argv[]){ #ifdef FORCE printf("CORE=%s\n", CORENAME); #else -#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) +#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) printf("CORE=%s\n", get_corename()); #endif #endif @@ -898,7 +902,7 @@ int main(int argc, char *argv[]){ #endif -#if defined(__i386__) || defined(__x86_64__) +#ifdef INTEL_AMD #ifndef FORCE get_sse(); #else @@ -978,7 +982,7 @@ int main(int argc, char *argv[]){ #ifdef FORCE printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); #else -#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) +#if defined(INTEL_AMD) || defined(POWER) || defined(__mips__) || defined(__arm__) || defined(__aarch64__) printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); #endif #endif From 92cdac5f876b781186a530abba6fdc49c310802c Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 1 Jan 2015 21:02:48 -0600 Subject: [PATCH 003/137] Added MSVC functions to cpuid_x86.c to replace gcc-specific ASM. --- cpuid_x86.c | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/cpuid_x86.c b/cpuid_x86.c index ef90b26d8..6b7e408d8 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -40,6 +40,12 @@ #include #include "cpuid.h" +#ifdef _MSC_VER +#define C_INLINE __inline +#else +#define C_INLINE inline +#endif + /* #ifdef NO_AVX #define CPUTYPE_HASWELL CPUTYPE_NEHALEM @@ -53,12 +59,26 @@ #endif */ +#ifdef _MSC_VER + +void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + int cpuInfo[4] = {-1}; + __cpuid(cpuInfo, op); + *eax = cpuInfo[0]; + *ebx = cpuInfo[1]; + *ecx = cpuInfo[2]; + *edx = cpuInfo[3]; +} + +#else + #ifndef CPUIDEMU #if defined(__APPLE__) && defined(__i386__) void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); #else -static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ +static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ #if defined(__i386__) && defined(__PIC__) __asm__ __volatile__ ("mov %%ebx, %%edi;" @@ -115,14 +135,16 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int * #endif -static inline int have_cpuid(void){ +#endif // _MSC_VER + +static C_INLINE int have_cpuid(void){ int eax, ebx, ecx, edx; cpuid(0, &eax, &ebx, &ecx, &edx); return eax; } -static inline int have_excpuid(void){ +static C_INLINE int have_excpuid(void){ int eax, ebx, ecx, edx; cpuid(0x80000000, &eax, &ebx, &ecx, &edx); @@ -130,10 +152,14 @@ static inline int have_excpuid(void){ } #ifndef NO_AVX -static inline void xgetbv(int op, int * eax, int * edx){ +static C_INLINE void xgetbv(int op, int * eax, int * edx){ //Use binary code for xgetbv +#ifdef _MSC_VER + *eax = __xgetbv(op); +#else __asm__ __volatile__ (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); +#endif } #endif From 0f6bec0a32cea93deff9eb9261827a7825f56a90 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 1 Jan 2015 21:03:17 -0600 Subject: [PATCH 004/137] cmake.prebuild now compiles getarch. Doesn't actually run it yet. --- cmake/prebuild.cmake | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 34a683a4f..200d03692 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -29,7 +29,20 @@ include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") # compile getarch -# TODO: need to use execute_process here, or compilation won't happen until later - maybe make temporary CMakeLists.txt file using file() ? +enable_language(ASM) +set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") +file(MAKE_DIRECTORY ${GETARCH_DIR}) +try_compile(GETARCH_RESULT ${GETARCH_DIR} + SOURCES ${CMAKE_SOURCE_DIR}/getarch.c ${CMAKE_SOURCE_DIR}/cpuid.S ${CPUIDEMO} + COMPILE_DEFINITIONS ${EXFLAGS} -I${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GETARCH_LOG + ) + +message(STATUS "GETARCH RESULT: ${GETARCH_RESULT}") +message(STATUS "GETARCH LOG: ${GETARCH_LOG}") + +# TODO: need to append output of getarch binary to TARGET_CONF, not sure if I can get at it after using try_compile - may need to create CMakeLists.txt on the fly and build/execute + #add_executable(getarch getarch.c cpuid.S ${CPUIDEMU} # WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) # From f4d1e7a2650c985722259fbcd594559397095743 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 27 Jan 2015 11:37:39 -0600 Subject: [PATCH 005/137] Hardcoded NUM_CORES to get system.cmake working. --- cmake/prebuild.cmake | 40 ++++++++++++++++++++++++++++++++++++++++ cmake/system.cmake | 6 +++--- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 200d03692..ffebbe30f 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -9,6 +9,43 @@ ## Next it runs f_check and appends some fortran information to the files. ## Finally it runs getarch and getarch_2nd for even more environment information. +# List of vars set by this file and included files: +# OSNAME +# ARCH +# C_COMPILER +# BINARY32 +# BINARY64 +# CEXTRALIB +# F_COMPILER +# FC +# BU +# CORE +# LIBCORE +# NUM_CORES <- REQUIRED +# HAVE_MMX +# HAVE_SSE +# HAVE_SSE2 +# HAVE_SSE3 +# MAKE +# SGEMM_UNROLL_M +# SGEMM_UNROLL_N +# DGEMM_UNROLL_M +# DGEMM_UNROLL_M +# QGEMM_UNROLL_N +# QGEMM_UNROLL_N +# CGEMM_UNROLL_M +# CGEMM_UNROLL_M +# ZGEMM_UNROLL_N +# ZGEMM_UNROLL_N +# XGEMM_UNROLL_M +# XGEMM_UNROLL_N +# CGEMM3M_UNROLL_M +# CGEMM3M_UNROLL_N +# ZGEMM3M_UNROLL_M +# ZGEMM3M_UNROLL_M +# XGEMM3M_UNROLL_N +# XGEMM3M_UNROLL_N + # CPUIDEMU = ../../cpuid/table.o if (DEFINED CPUIDEMU) @@ -66,3 +103,6 @@ message(STATUS "GETARCH LOG: ${GETARCH_LOG}") # TODO: need to read in the vars from Makefile.conf/Makefile_kernel.conf +# temporarily hardcoded to get system.cmake working +set(NUM_CORES 4) + diff --git a/cmake/system.cmake b/cmake/system.cmake index e5c66f3ed..dc5aec2f2 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -88,8 +88,8 @@ if (NOT DEFINED NUM_THREADS) set(NUM_THREADS ${NUM_CORES}) endif () -if (NUM_THREADS EQUALS 1) - # TODO: was "override USE_THREAD = 0", do we need override here? -hpa +if (${NUM_THREADS} EQUAL 1) + # TODO: was "override USE_THREAD = 0", do we need "override" here? -hpa set(USE_THREAD 0) endif () @@ -101,7 +101,7 @@ if (DEFINED USE_THREAD) endif () else () # N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa - if (${NUM_THREADS} EQUALS 1) + if (${NUM_THREADS} EQUAL 1) unset(SMP) else () set(SMP 1) From d2d15e522f04344be4a1d1cae24f6ef96dedbc00 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 27 Jan 2015 12:23:35 -0600 Subject: [PATCH 006/137] Started converting lib target to CMake. The main part of this target is looping through the BLAS subfolders and calling make on them. Need to add CMakeLists.txt for each of these subfolders. --- CMakeLists.txt | 40 ++++++++++++++++++++++++++++++++++++++++ cmake/prebuild.cmake | 3 ++- 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2dbb6f059..6bca1899e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,4 +15,44 @@ message(WARNING "CMake support is experimental. This will not produce the same M include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") +set(BLASDIRS interface driver/level2 driver/level3 driver/others) + +if (NOT ${DYNAMIC_ARCH}) + list(APPEND BLASDIRS kernel) +endif () + +if (DEFINED UTEST_CHECK) + set(SANITY_CHECK 1) +endif () + +if (DEFINED SANITY_CHECK) + list(APPEND BLASDIRS reference) +endif () + +set(SUBDIRS ${BLASDIRS}) +if (NOT ${NO_LAPACK}) + list(APPEND SUBDIRS lapack) +endif () + +set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench) + +# all :: libs netlib tests shared + +# libs: +if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") + message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") +endif () + +# Let CMake handle this +#if (${NOFORTRAN}) +# message(ERROR "OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.") +#endif () + +if (${NO_STATIC} AND ${NO_SHARED}) + message(FATAL_ERROR "Neither static nor shared are enabled.") +endif () + +foreach (BLAS_DIR ${BLASDIRS}) + add_subdirectory(${BLAS_DIR}) +endforeach () diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index ffebbe30f..ded9f2ce0 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -19,7 +19,7 @@ # F_COMPILER # FC # BU -# CORE +# CORE <- REQUIRED # LIBCORE # NUM_CORES <- REQUIRED # HAVE_MMX @@ -105,4 +105,5 @@ message(STATUS "GETARCH LOG: ${GETARCH_LOG}") # temporarily hardcoded to get system.cmake working set(NUM_CORES 4) +set(CORE "GENERIC") From 864b8b31de8dad0ba45b333fe1c04e4ed667a7a5 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 27 Jan 2015 13:54:29 -0600 Subject: [PATCH 007/137] Fixed incorrect case in OS_ definition in c_check. --- cmake/c_check.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index 20c0aa72f..5669c723a 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -17,11 +17,12 @@ # TODO: is ${BINARY} sufficient for the __32BIT__ define? # TODO: CMAKE_SYSTEM_PROCESSOR is not set by CMake, need to set it manually when doing a cross-compile # TODO: CMAKE_CXX_COMPILER_ID and CMAKE_SYSTEM_NAME are probably not the same strings as OpenBLAS is expecting -# TODO: detect this +# TODO: detect NEED_FU set(NEED_FU 1) +string(TOUPPER ${CMAKE_SYSTEM_NAME} CMAKE_SYSTEM_NAME_UC) file(WRITE ${TARGET_CONF} - "#define OS_${CMAKE_SYSTEM_NAME}\t1\n" + "#define OS_${CMAKE_SYSTEM_NAME_UC}\t1\n" "#define ARCH_${CMAKE_SYSTEM_PROCESSOR}\t1\n" "#define C_${CMAKE_CXX_COMPILER_ID}\t1\n" "#define __${BINARY}BIT__\t1\n" From 1e8bb0e0e02474d70708a9b555e3ca0227b5422c Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 27 Jan 2015 14:03:46 -0600 Subject: [PATCH 008/137] Fixed architecture detection when AMD64 in c_check. --- cmake/c_check.cmake | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index 5669c723a..07ed8a178 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -20,10 +20,16 @@ # TODO: detect NEED_FU set(NEED_FU 1) -string(TOUPPER ${CMAKE_SYSTEM_NAME} CMAKE_SYSTEM_NAME_UC) +# Convert CMake vars into the format that OpenBLAS expects +string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS) +set(HOST_ARCH ${CMAKE_SYSTEM_PROCESSOR}) +if (${HOST_ARCH} STREQUAL "AMD64") + set(HOST_ARCH "X86_64") +endif () + file(WRITE ${TARGET_CONF} - "#define OS_${CMAKE_SYSTEM_NAME_UC}\t1\n" - "#define ARCH_${CMAKE_SYSTEM_PROCESSOR}\t1\n" + "#define OS_${HOST_OS}\t1\n" + "#define ARCH_${HOST_ARCH}\t1\n" "#define C_${CMAKE_CXX_COMPILER_ID}\t1\n" "#define __${BINARY}BIT__\t1\n" "#define FUNDERSCORE\t${NEED_FU}\n") From 5eefe18ae4c1c018201aaa36df2f1c889b190b0e Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 27 Jan 2015 16:17:17 -0600 Subject: [PATCH 009/137] Added CMakeLists.txt for the first of the BLAS folders. It only does the double precision compile currently. I realized I didn't finish converting Makefile.system yet, so I made a note of that. --- .gitignore | 1 + CMakeLists.txt | 22 +++++++++++++++++++ cmake/system.cmake | 2 ++ interface/CMakeLists.txt | 46 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 interface/CMakeLists.txt diff --git a/.gitignore b/.gitignore index bae3d057f..3e163abef 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,4 @@ test/zblat1 test/zblat2 test/zblat3 build +build.* diff --git a/CMakeLists.txt b/CMakeLists.txt index 6bca1899e..25b88d565 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -56,3 +56,25 @@ foreach (BLAS_DIR ${BLASDIRS}) add_subdirectory(${BLAS_DIR}) endforeach () +#Save the config files for installation +# @cp Makefile.conf Makefile.conf_last +# @cp config.h config_last.h +#ifdef QUAD_PRECISION +# @echo "#define QUAD_PRECISION">> config_last.h +#endif +#ifeq ($(EXPRECISION), 1) +# @echo "#define EXPRECISION">> config_last.h +#endif +### +#ifeq ($(DYNAMIC_ARCH), 1) +# @$(MAKE) -C kernel commonlibs || exit 1 +# @for d in $(DYNAMIC_CORE) ; \ +# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ +# done +# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last +#endif +#ifdef USE_THREAD +# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last +#endif +# @touch lib.grd + diff --git a/cmake/system.cmake b/cmake/system.cmake index dc5aec2f2..11f0c5cdd 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -128,3 +128,5 @@ set(DLLWRAP "$(CROSS_SUFFIX)dllwrap") set(OBJCOPY "$(CROSS_SUFFIX)objcopy") set(OBJCONV "$(CROSS_SUFFIX)objconv") +# TODO: convert rest of Makefile.system, left off at "OS dependent settings" + diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt new file mode 100644 index 000000000..62a889f49 --- /dev/null +++ b/interface/CMakeLists.txt @@ -0,0 +1,46 @@ + +include_directories(${CMAKE_SOURCE_DIR}) + +# TODO: Need to generate object files for S, D, C, Q and X - start with D for now. +# The sources are the same, but there are additional preprocessor definitions depending on the precision (see Makefile.tail). + +add_library(DBLAS1OBJS OBJECT + axpy.c swap.c + copy.c scal.c + dot.c + asum.c nrm2.c + max.c # amax/min/amin compiled later from same source + rot.c rotg.c rotm.c rotmg.c + axpby.c +) + +# N.B. The original Makefile passed in -UUSE_MIN and -UUSE_ABS (where appropriate), no way to do that at a source-level in cmake. REMOVE_DEFINITIONS removes a definition for the rest of the compilation. +add_library(AMAX_OBJ OBJECT max.c) +set_target_properties(AMAX_OBJ PROPERTIES COMPILE_DEFINITIONS USE_ABS) +add_library(AMIN_OBJ OBJECT max.c) +set_target_properties(AMIN_OBJ PROPERTIES COMPILE_DEFINITIONS USE_ABS) +set_target_properties(AMIN_OBJ PROPERTIES COMPILE_DEFINITIONS USE_MIN) +add_library(MIN_OBJ OBJECT max.c) +set_target_properties(MIN_OBJ PROPERTIES COMPILE_DEFINITIONS USE_MIN) + +# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f +add_library(DBLAS2OBJS OBJECT + gemv.c ger.c + trsv.c trmv.c symv.c + syr.c syr2.c gbmv.c + sbmv.c spmv.c + spr.c spr2.c + tbsv.c tbmv.c + tpsv.c tpmv.c +) + +add_library(DBLAS3OBJS OBJECT + gemm.c symm.c + trsm.c syrk.c syr2k.c + omatcopy.c imatcopy.c +) + +# trmm is trsm with a compiler flag set +add_library(TRMM_OBJ OBJECT trsm.c) +set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS TRMM) + From 9a508abdc7f810df8c09e94845ef17338724fab0 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 28 Jan 2015 14:52:15 -0600 Subject: [PATCH 010/137] Added first pass at driver/level2 makefile conversion. --- driver/level2/CMakeLists.txt | 86 ++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 driver/level2/CMakeLists.txt diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt new file mode 100644 index 000000000..8dc37a880 --- /dev/null +++ b/driver/level2/CMakeLists.txt @@ -0,0 +1,86 @@ + +# sources that need to be compiled twice, once with no flags and once with LOWER +set(UL_SOURCES + sbmv_k.c + spmv_k.c + spr_k.c + spr2_k.c + syr_k.c + syr2_k.c +) + +# sources that need to be compiled several times, for UNIT, TRANS +set(NU_SOURCES + tbmv_U.c + tbsv_U.c + tpmv_U.c + tpsv_U.c + trmv_U.c + trsv_U.c + tbmv_L.c + tbsv_L.c + tpmv_L.c + tpsv_L.c + trmv_L.c + trsv_L.c +) + +# first compile all the objects that don't need specific preprocessor defines +add_library(DBLAS_NONE OBJECT + gbmv_k.c # gbmv_N + ${UL_SOURCES} + ${NU_SOURCES} +) + +# then do objects with transpose/triangular/etc definitions + +# objects that need TRANS set +add_library(DBLAS_T OBJECT gbmv_k.c ${NU_SOURCES}) +set_target_properties(DBLAS_T PROPERTIES COMPILE_DEFINITIONS TRANS) + +# objects that need LOWER set +add_library(DBLAS_L OBJECT ${UL_SOURCES}) +set_target_properties(DBLAS_L PROPERTIES COMPILE_DEFINITIONS LOWER) + +# objects that need UNIT set +add_library(DBLAS_U OBJECT ${NU_SOURCES}) +set_target_properties(DBLAS_U PROPERTIES COMPILE_DEFINITIONS UNIT) + +# objects that need TRANS and UNIT set +add_library(DBLAS_TU OBJECT ${NU_SOURCES}) +set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS UNIT) +set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS TRANS) + +#if (DEFINED SMP) +# add_library(DBLASOBJS_SMP +# dgemv_thread_n.c dgemv_thread_t.c +# dger_thread.c +# dsymv_thread_U.c dsymv_thread_L.c +# dsyr_thread_U.c dsyr_thread_L.c +# dsyr2_thread_U.c dsyr2_thread_L.c +# dspr_thread_U.c dspr_thread_L.c +# dspr2_thread_U.c dspr2_thread_L.c +# dtrmv_thread_NUU.c dtrmv_thread_NUN.c +# dtrmv_thread_NLU.c dtrmv_thread_NLN.c +# dtrmv_thread_TUU.c dtrmv_thread_TUN.c +# dtrmv_thread_TLU.c dtrmv_thread_TLN.c +# dspmv_thread_U.c dspmv_thread_L.c +# dtpmv_thread_NUU.c dtpmv_thread_NUN.c +# dtpmv_thread_NLU.c dtpmv_thread_NLN.c +# dtpmv_thread_TUU.c dtpmv_thread_TUN.c +# dtpmv_thread_TLU.c dtpmv_thread_TLN.c +# dgbmv_thread_n.c dgbmv_thread_t.c +# dsbmv_thread_U.c dsbmv_thread_L.c +# dtbmv_thread_NUU.c dtbmv_thread_NUN.c +# dtbmv_thread_NLU.c dtbmv_thread_NLN.c +# dtbmv_thread_TUU.c dtbmv_thread_TUN.c +# dtbmv_thread_TLU.c dtbmv_thread_TLN.c +# ) +#endif () + +set(DBLAS_TARGETS DBLAS_NONE DBLAS_T DBLAS_L DBLAS_U DBLAS_TU) + +foreach (${TARGET} ${DBLAS_TARGETS}) + set_target_properties(${TARGET} PROPERTIES COMPILE_DEFINITIONS DOUBLE) +endforeach () + From c5f5c7a0769c852b569d734fe03e7559039820ea Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 28 Jan 2015 15:47:47 -0600 Subject: [PATCH 011/137] Updated c_check OS/compiler/bits detection. --- cmake/c_check.cmake | 37 +++++++++++++++++++++++++++---------- cmake/prebuild.cmake | 2 +- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index 07ed8a178..d8facfedc 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -7,30 +7,47 @@ ## This is triggered by prebuild.cmake and runs before any of the code is built. ## Creates config.h and Makefile.conf. -# N.B. c_check is not cross-platform, so instead try to use CMake variables. Alternatively, could use try_compile to get some of this info the same way c_check does. +# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. -# run c_check (creates the TARGET files) -# message(STATUS "Running c_check...") -# execute_process(COMMAND perl c_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_CXX_COMPILER} -# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) - -# TODO: is ${BINARY} sufficient for the __32BIT__ define? -# TODO: CMAKE_SYSTEM_PROCESSOR is not set by CMake, need to set it manually when doing a cross-compile -# TODO: CMAKE_CXX_COMPILER_ID and CMAKE_SYSTEM_NAME are probably not the same strings as OpenBLAS is expecting # TODO: detect NEED_FU set(NEED_FU 1) # Convert CMake vars into the format that OpenBLAS expects string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS) +if (${HOST_OS} STREQUAL "WINDOWS") + set(HOST_OS WINNT) +endif () + +# added by hpa - check size of void ptr to detect 64-bit compile +if (NOT DEFINED BINARY) + set(BINARY 32) + if (CMAKE_SIZEOF_VOID_P EQUAL 8) + set(BINARY 64) + endif () +endif () + +# CMake docs define these: +# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for. +# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on. set(HOST_ARCH ${CMAKE_SYSTEM_PROCESSOR}) if (${HOST_ARCH} STREQUAL "AMD64") set(HOST_ARCH "X86_64") endif () +# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong +if (${HOST_ARCH} STREQUAL "X86_64" AND BINARY EQUAL 32) + set(HOST_ARCH X86) +endif () + +set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) +if (${COMPILER_ID} STREQUAL "GNU") + set(COMPILER_ID "GCC") +endif () + file(WRITE ${TARGET_CONF} "#define OS_${HOST_OS}\t1\n" "#define ARCH_${HOST_ARCH}\t1\n" - "#define C_${CMAKE_CXX_COMPILER_ID}\t1\n" + "#define C_${COMPILER_ID}\t1\n" "#define __${BINARY}BIT__\t1\n" "#define FUNDERSCORE\t${NEED_FU}\n") diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index ded9f2ce0..a4faa131d 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -78,7 +78,7 @@ try_compile(GETARCH_RESULT ${GETARCH_DIR} message(STATUS "GETARCH RESULT: ${GETARCH_RESULT}") message(STATUS "GETARCH LOG: ${GETARCH_LOG}") -# TODO: need to append output of getarch binary to TARGET_CONF, not sure if I can get at it after using try_compile - may need to create CMakeLists.txt on the fly and build/execute +# TODO: need to append output of getarch binary to TARGET_CONF, use COPY_FILE param (look at try_compile docs) to copy the resulting binary somewhere then run it #add_executable(getarch getarch.c cpuid.S ${CPUIDEMU} # WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) From 1c5b6bb4f7fd843433b922ed1976d4a05f46a27c Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 28 Jan 2015 16:33:48 -0600 Subject: [PATCH 012/137] Added CORE define to config.h in prebuild.cmake (temporarily). --- cmake/prebuild.cmake | 4 ++++ driver/level2/CMakeLists.txt | 2 ++ 2 files changed, 6 insertions(+) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index a4faa131d..76f74e049 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -106,4 +106,8 @@ message(STATUS "GETARCH LOG: ${GETARCH_LOG}") # temporarily hardcoded to get system.cmake working set(NUM_CORES 4) set(CORE "GENERIC") +# TODO: this should be done by getarch! see above +file(APPEND ${TARGET_CONF} + "#define ${CORE}" +) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 8dc37a880..d06d03ccf 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -1,4 +1,6 @@ +include_directories(${CMAKE_SOURCE_DIR}) + # sources that need to be compiled twice, once with no flags and once with LOWER set(UL_SOURCES sbmv_k.c From 8ede4a8da49a26a89bc6d1f018a4e33103c43c31 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 28 Jan 2015 17:18:26 -0600 Subject: [PATCH 013/137] getarch now compiles and sets config.h defines properly. Still isn't parsed into CMake variables, and getarch_2 needs to get the same treatment. --- cmake/prebuild.cmake | 39 +++++++++++++++--------------------- cmake/system.cmake | 2 +- driver/level2/CMakeLists.txt | 4 ++-- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 76f74e049..ba0e0789e 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -68,29 +68,27 @@ include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") # compile getarch enable_language(ASM) set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") +set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") file(MAKE_DIRECTORY ${GETARCH_DIR}) try_compile(GETARCH_RESULT ${GETARCH_DIR} SOURCES ${CMAKE_SOURCE_DIR}/getarch.c ${CMAKE_SOURCE_DIR}/cpuid.S ${CPUIDEMO} - COMPILE_DEFINITIONS ${EXFLAGS} -I${CMAKE_SOURCE_DIR} + COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GETARCH_LOG - ) + COPY_FILE ${GETARCH_BIN} +) -message(STATUS "GETARCH RESULT: ${GETARCH_RESULT}") -message(STATUS "GETARCH LOG: ${GETARCH_LOG}") +message(STATUS "Running getarch") -# TODO: need to append output of getarch binary to TARGET_CONF, use COPY_FILE param (look at try_compile docs) to copy the resulting binary somewhere then run it +# use the cmake binary w/ the -E param to run a shell command in a cross-platform way +execute_process(COMMAND ${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) +execute_process(COMMAND ${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) -#add_executable(getarch getarch.c cpuid.S ${CPUIDEMU} -# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -# -## run getarch, which appends even more to the TARGET files -#message(STATUS "Running getarch") -#execute_process(COMMAND getarch 0 >> ${TARGET_MAKE} -# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -#execute_process(COMMAND getarch 1 >> ${TARGET_CONF} -# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -# -## config.h is ready for getarch_2nd now, so compile that +message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") + +# append config data from getarch even more to the TARGET file +file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT}) + +## TODO: config.h is ready for getarch_2nd now, so compile that #set(GETARCH2_SOURCES getarch_2nd.c config.h) #add_executable(getarch_2nd getarch_2nd.c config.h) # @@ -101,13 +99,8 @@ message(STATUS "GETARCH LOG: ${GETARCH_LOG}") #execute_process(COMMAND getarch_2nd 1 >> ${TARGET_CONF} # WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -# TODO: need to read in the vars from Makefile.conf/Makefile_kernel.conf - -# temporarily hardcoded to get system.cmake working +# TODO: parse the MAKE variables from getarch/getarch2 (GETARCH_MAKE_OUT) into CMAKE vars +# for now I temporarily hardcoded to get system.cmake working set(NUM_CORES 4) set(CORE "GENERIC") -# TODO: this should be done by getarch! see above -file(APPEND ${TARGET_CONF} - "#define ${CORE}" -) diff --git a/cmake/system.cmake b/cmake/system.cmake index 11f0c5cdd..0753ed028 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -109,7 +109,7 @@ else () endif () if (${SMP}) - message("SMP enabled.") + message(STATUS "SMP enabled.") endif () if (NOT DEFINED NEED_PIC) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index d06d03ccf..990337fe1 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -82,7 +82,7 @@ set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS TRANS) set(DBLAS_TARGETS DBLAS_NONE DBLAS_T DBLAS_L DBLAS_U DBLAS_TU) -foreach (${TARGET} ${DBLAS_TARGETS}) - set_target_properties(${TARGET} PROPERTIES COMPILE_DEFINITIONS DOUBLE) +foreach (${DBLAS_TARGET} ${DBLAS_TARGETS}) + set_target_properties(${DBLAS_TARGET} PROPERTIES COMPILE_DEFINITIONS DOUBLE) endforeach () From 61f21b5d036a221779ee7f467ca0dd856aea7f8f Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 28 Jan 2015 22:20:15 -0600 Subject: [PATCH 014/137] getarch_2nd now appends its output to config.h/config_kernel.h --- cmake/prebuild.cmake | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index ba0e0789e..fa6621cd1 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -54,7 +54,7 @@ endif () if (DEFINED TARGET_CORE) # set the C flags for just this file - set_source_files_properties(getarch_2nd.c PROPERTIES COMPILE_FLAGS "-DBUILD_KERNEL") + set(GETARCH2_FLAGS "-DBUILD_KERNEL") set(TARGET_MAKE "Makefile_kernel.conf") set(TARGET_CONF "config_kernel.h") else() @@ -85,19 +85,31 @@ execute_process(COMMAND ${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") -# append config data from getarch even more to the TARGET file +# append config data from getarch to the TARGET file file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT}) ## TODO: config.h is ready for getarch_2nd now, so compile that -#set(GETARCH2_SOURCES getarch_2nd.c config.h) -#add_executable(getarch_2nd getarch_2nd.c config.h) -# -## finally run getarch_2nd, appending yet more to the TARGET files -#message(STATUS "Running getarch_2nd") -#execute_process(COMMAND getarch_2nd 0 >> ${TARGET_MAKE} -# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) -#execute_process(COMMAND getarch_2nd 1 >> ${TARGET_CONF} -# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) +set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") +set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") +file(MAKE_DIRECTORY ${GETARCH2_DIR}) +try_compile(GETARCH2_RESULT ${GETARCH2_DIR} + SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c + COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE GETARCH2_LOG + COPY_FILE ${GETARCH2_BIN} +) + +message(STATUS "getarch2 result ${GETARCH2_RESULT}") +message(STATUS "getarch2 log ${GETARCH2_LOG}") +# use the cmake binary w/ the -E param to run a shell command in a cross-platform way +execute_process(COMMAND ${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) +execute_process(COMMAND ${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) + +message(STATUS "GETARCH_2 results:\n${GETARCH2_MAKE_OUT}") +message(STATUS "GETARCH_2 cresults:\n${GETARCH2_CONF_OUT}") + +# append config data from getarch_2nd to the TARGET file +file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT}) # TODO: parse the MAKE variables from getarch/getarch2 (GETARCH_MAKE_OUT) into CMAKE vars # for now I temporarily hardcoded to get system.cmake working From 8c23965da381ef878a1f5fd81506b63df3083037 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 28 Jan 2015 22:57:44 -0600 Subject: [PATCH 015/137] prebuild.cmake now reads the output from getarch into CMake vars. --- cmake/prebuild.cmake | 30 ++++++++++++++++++++---------- driver/level2/CMakeLists.txt | 2 +- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index fa6621cd1..99ff0430a 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -83,12 +83,21 @@ message(STATUS "Running getarch") execute_process(COMMAND ${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) execute_process(COMMAND ${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) -message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") +#message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") # append config data from getarch to the TARGET file file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT}) -## TODO: config.h is ready for getarch_2nd now, so compile that +# TODO: make this a function, the exact same code is used again with getarch2 +string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_MAKE_OUT}") +foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) + # split the line into var and value, then assign the value to a CMake var + string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") + list(GET SPLIT_VAR 0 VAR_NAME) + list(GET SPLIT_VAR 1 VAR_VALUE) + set(${VAR_NAME} ${VAR_VALUE}) +endforeach () + set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") file(MAKE_DIRECTORY ${GETARCH2_DIR}) @@ -99,20 +108,21 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR} COPY_FILE ${GETARCH2_BIN} ) -message(STATUS "getarch2 result ${GETARCH2_RESULT}") -message(STATUS "getarch2 log ${GETARCH2_LOG}") # use the cmake binary w/ the -E param to run a shell command in a cross-platform way execute_process(COMMAND ${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) execute_process(COMMAND ${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) -message(STATUS "GETARCH_2 results:\n${GETARCH2_MAKE_OUT}") -message(STATUS "GETARCH_2 cresults:\n${GETARCH2_CONF_OUT}") +#message(STATUS "GETARCH_2 results:\n${GETARCH2_MAKE_OUT}") # append config data from getarch_2nd to the TARGET file file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT}) -# TODO: parse the MAKE variables from getarch/getarch2 (GETARCH_MAKE_OUT) into CMAKE vars -# for now I temporarily hardcoded to get system.cmake working -set(NUM_CORES 4) -set(CORE "GENERIC") +string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH2_MAKE_OUT}") +foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) + # split the line into var and value, then assign the value to a CMake var + string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") + list(GET SPLIT_VAR 0 VAR_NAME) + list(GET SPLIT_VAR 1 VAR_VALUE) + set(${VAR_NAME} ${VAR_VALUE}) +endforeach () diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 990337fe1..c2119bfe1 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -82,7 +82,7 @@ set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS TRANS) set(DBLAS_TARGETS DBLAS_NONE DBLAS_T DBLAS_L DBLAS_U DBLAS_TU) -foreach (${DBLAS_TARGET} ${DBLAS_TARGETS}) +foreach (DBLAS_TARGET ${DBLAS_TARGETS}) set_target_properties(${DBLAS_TARGET} PROPERTIES COMPILE_DEFINITIONS DOUBLE) endforeach () From dabaecb2bc7d536607abe2d9930636934c826150 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 29 Jan 2015 09:30:47 -0600 Subject: [PATCH 016/137] Moved getarch parsing code into a function. --- cmake/prebuild.cmake | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 99ff0430a..ad1a83912 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -65,6 +65,19 @@ endif () include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") +# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE +function(ParseGetArchVars GETARCH_IN) + string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}") + foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) + # split the line into var and value, then assign the value to a CMake var + string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") + list(GET SPLIT_VAR 0 VAR_NAME) + list(GET SPLIT_VAR 1 VAR_VALUE) + message(STATUS "Setting ${VAR_NAME} to ${VAR_VALUE}") + set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) + endforeach () +endfunction () + # compile getarch enable_language(ASM) set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") @@ -85,18 +98,9 @@ execute_process(COMMAND ${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) #message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") -# append config data from getarch to the TARGET file +# append config data from getarch to the TARGET file and read in CMake vars file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT}) - -# TODO: make this a function, the exact same code is used again with getarch2 -string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_MAKE_OUT}") -foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) - # split the line into var and value, then assign the value to a CMake var - string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") - list(GET SPLIT_VAR 0 VAR_NAME) - list(GET SPLIT_VAR 1 VAR_VALUE) - set(${VAR_NAME} ${VAR_VALUE}) -endforeach () +ParseGetArchVars(${GETARCH_MAKE_OUT}) set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") @@ -112,17 +116,7 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR} execute_process(COMMAND ${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) execute_process(COMMAND ${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) -#message(STATUS "GETARCH_2 results:\n${GETARCH2_MAKE_OUT}") - -# append config data from getarch_2nd to the TARGET file +# append config data from getarch_2nd to the TARGET file and read in CMake vars file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT}) - -string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH2_MAKE_OUT}") -foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) - # split the line into var and value, then assign the value to a CMake var - string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") - list(GET SPLIT_VAR 0 VAR_NAME) - list(GET SPLIT_VAR 1 VAR_VALUE) - set(${VAR_NAME} ${VAR_VALUE}) -endforeach () +ParseGetArchVars(${GETARCH2_MAKE_OUT}) From dbdca7bf0c9a8ae202653a1dab028abeabeab275 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 29 Jan 2015 22:53:11 -0600 Subject: [PATCH 017/137] Added first pass at driver/level3 Makefile conversion. Added a rather convoluted CMake function to find all combinations of a given list. This will be useful for the object files that are compiled multiple times with different combinations of preprocessor definitions. --- cmake/prebuild.cmake | 1 - driver/level3/CMakeLists.txt | 106 +++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 driver/level3/CMakeLists.txt diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index ad1a83912..60566e3f2 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -73,7 +73,6 @@ function(ParseGetArchVars GETARCH_IN) string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") list(GET SPLIT_VAR 0 VAR_NAME) list(GET SPLIT_VAR 1 VAR_VALUE) - message(STATUS "Setting ${VAR_NAME} to ${VAR_VALUE}") set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) endforeach () endfunction () diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt new file mode 100644 index 000000000..df6445de4 --- /dev/null +++ b/driver/level3/CMakeLists.txt @@ -0,0 +1,106 @@ +include_directories(${CMAKE_SOURCE_DIR}) + +set(USE_GEMM3M 0) + +if (DEFINED ARCH) + if (${ARCH} STREQUAL "x86") + set(USE_GEMM3M 1) + endif () + + if (${ARCH} STREQUAL "x86_64") + set(USE_GEMM3M 1) + endif () + + if (${ARCH} STREQUAL "ia64") + set(USE_GEMM3M 1) + endif () + + if (${ARCH} STREQUAL "MIPS") + set(USE_GEMM3M 1) + endif () +endif () + +# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa + +# loop through gemm.c defines +set(GEMM_DEFINES NN NT TN TT) +foreach (GEMM_DEFINE ${GEMM_DEFINES}) + add_library(GEMM_${GEMM_DEFINE}_OBJS OBJECT gemm.c) + set_target_properties(GEMM_${GEMM_DEFINE}_OBJS PROPERTIES COMPILE_DEFINITIONS ${GEMM_DEFINE}) +endforeach () + +# Returns all combinations of the input list, as a list with colon-separated combinations +# E.g. input of A B C returns A B C A:B A:C B:C +# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). +function(AllCombinations list_in) + list(LENGTH list_in list_count) + set(num_combos 1) + math(EXPR num_combos "${num_combos} << ${list_count}") + set(LIST_OUT "") + foreach (c RANGE ${num_combos}) + set(current_combo "") + # this is a little ridiculous just to iterate through a list w/ indices + math(EXPR last_list_index "${list_count} - 1") + foreach (list_index RANGE 0 ${last_list_index}) + math(EXPR bit "1 << ${list_index}") + math(EXPR combo_has_bit "${c} & ${bit}") + list(GET list_in ${list_index} list_elem) + if (combo_has_bit) + if (current_combo) + set(current_combo "${current_combo}:${list_elem}") + else () + set(current_combo ${list_elem}) + endif () + endif () + endforeach () + list(APPEND LIST_OUT ${current_combo}) + endforeach () + set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) +endfunction () + +# these sources are compiled with combinations of TRANS, UPPER, and UNIT, for 32 combinations total +set(TRM_SOURCES trmm_L.c trmm_R.c trsm_L.c trsm_R.c) +AllCombinations("TRANS UPPER UNIT") +set(TRM_DEFINE_COMBOS LIST_OUT) +message(STATUS "alcombos result: ${LIST_OUT}") +foreach (TRM_SOURCE ${TRM_SOURCES}) + foreach (TRM_DEFINES ${TRM_DEFINE_COMBOS}) + string(REGEX MATCH "[a-z]+_[LR]" TRM_NAME ${TRM_SOURCE}) + string(TOUPPER ${TRM_NAME} TRM_NAME) + # TODO: TRM_DEFINES is a colon-separated list of defines to set for this object - need to parse it and set them using set_target_properties, and also come up with a unique id for the lib name (e.g. first letter of each define, so TRANS UPPER UNIT is TUU) + #add_library(${TRM_NAME}_${TRM_DEFINE}_OBJS OBJECT ${TRM_SOURCE}) + #set_target_properties(${TRM_NAME}_${TRM_DEFINE}_OBJS PROPERTIES COMPILE_DEFINITIONS ${TRM_DEFINE}) + endforeach () +endforeach () + +# dsymm_LU.c dsymm_LL.c dsymm_RU.c dsymm_RL.c +# dsyrk_UN.c dsyrk_UT.c dsyrk_LN.c dsyrk_LT.c +# dsyr2k_UN.c dsyr2k_UT.c dsyr2k_LN.c dsyr2k_LT.c +# dsyrk_kernel_U.c dsyrk_kernel_L.c +# dsyr2k_kernel_U.c dsyr2k_kernel_L.c + +#if (SMP) +# +# COMMONOBJS += gemm_thread_m.c gemm_thread_n.c gemm_thread_mn.c gemm_thread_variable.c +# COMMONOBJS += syrk_thread.c +# +# if (USE_SIMPLE_THREADED_LEVEL3) +# DBLASOBJS += dgemm_thread_nn.c dgemm_thread_nt.c dgemm_thread_tn.c dgemm_thread_tt.c +# DBLASOBJS += dsymm_thread_LU.c dsymm_thread_LL.c dsymm_thread_RU.c dsymm_thread_RL.c +# DBLASOBJS += dsyrk_thread_UN.c dsyrk_thread_UT.c dsyrk_thread_LN.c dsyrk_thread_LT.c +# +# endif () +#endif () +# +#HPLOBJS = +# dgemm_nn.c dgemm_nt.c dgemm_tn.c dgemm_tt.c +# dtrsm_LNUU.c dtrsm_LNUN.c dtrsm_LNLU.c dtrsm_LNLN.c +# dtrsm_LTUU.c dtrsm_LTUN.c dtrsm_LTLU.c dtrsm_LTLN.c +# dtrsm_RNUU.c dtrsm_RNUN.c dtrsm_RNLU.c dtrsm_RNLN.c +# dtrsm_RTUU.c dtrsm_RTUN.c dtrsm_RTLU.c dtrsm_RTLN.c +# +#if (USE_SIMPLE_THREADED_LEVEL3) +# HPLOBJS += dgemm_thread_nn.c dgemm_thread_nt.c +# dgemm_thread_tn.c dgemm_thread_tt.c +#endif +# From a6cf8aafc0ad973a0599c939ddcbc138f99a4669 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 30 Jan 2015 11:21:50 -0600 Subject: [PATCH 018/137] Updated level3/CMakeLists with correct defines using all combos. --- driver/level2/CMakeLists.txt | 175 +++++++++++++++++------------------ driver/level3/CMakeLists.txt | 35 +++++-- interface/CMakeLists.txt | 91 +++++++++--------- 3 files changed, 157 insertions(+), 144 deletions(-) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index c2119bfe1..ff6faab90 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -1,88 +1,87 @@ - -include_directories(${CMAKE_SOURCE_DIR}) - -# sources that need to be compiled twice, once with no flags and once with LOWER -set(UL_SOURCES - sbmv_k.c - spmv_k.c - spr_k.c - spr2_k.c - syr_k.c - syr2_k.c -) - -# sources that need to be compiled several times, for UNIT, TRANS -set(NU_SOURCES - tbmv_U.c - tbsv_U.c - tpmv_U.c - tpsv_U.c - trmv_U.c - trsv_U.c - tbmv_L.c - tbsv_L.c - tpmv_L.c - tpsv_L.c - trmv_L.c - trsv_L.c -) - -# first compile all the objects that don't need specific preprocessor defines -add_library(DBLAS_NONE OBJECT - gbmv_k.c # gbmv_N - ${UL_SOURCES} - ${NU_SOURCES} -) - -# then do objects with transpose/triangular/etc definitions - -# objects that need TRANS set -add_library(DBLAS_T OBJECT gbmv_k.c ${NU_SOURCES}) -set_target_properties(DBLAS_T PROPERTIES COMPILE_DEFINITIONS TRANS) - -# objects that need LOWER set -add_library(DBLAS_L OBJECT ${UL_SOURCES}) -set_target_properties(DBLAS_L PROPERTIES COMPILE_DEFINITIONS LOWER) - -# objects that need UNIT set -add_library(DBLAS_U OBJECT ${NU_SOURCES}) -set_target_properties(DBLAS_U PROPERTIES COMPILE_DEFINITIONS UNIT) - -# objects that need TRANS and UNIT set -add_library(DBLAS_TU OBJECT ${NU_SOURCES}) -set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS UNIT) -set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS TRANS) - -#if (DEFINED SMP) -# add_library(DBLASOBJS_SMP -# dgemv_thread_n.c dgemv_thread_t.c -# dger_thread.c -# dsymv_thread_U.c dsymv_thread_L.c -# dsyr_thread_U.c dsyr_thread_L.c -# dsyr2_thread_U.c dsyr2_thread_L.c -# dspr_thread_U.c dspr_thread_L.c -# dspr2_thread_U.c dspr2_thread_L.c -# dtrmv_thread_NUU.c dtrmv_thread_NUN.c -# dtrmv_thread_NLU.c dtrmv_thread_NLN.c -# dtrmv_thread_TUU.c dtrmv_thread_TUN.c -# dtrmv_thread_TLU.c dtrmv_thread_TLN.c -# dspmv_thread_U.c dspmv_thread_L.c -# dtpmv_thread_NUU.c dtpmv_thread_NUN.c -# dtpmv_thread_NLU.c dtpmv_thread_NLN.c -# dtpmv_thread_TUU.c dtpmv_thread_TUN.c -# dtpmv_thread_TLU.c dtpmv_thread_TLN.c -# dgbmv_thread_n.c dgbmv_thread_t.c -# dsbmv_thread_U.c dsbmv_thread_L.c -# dtbmv_thread_NUU.c dtbmv_thread_NUN.c -# dtbmv_thread_NLU.c dtbmv_thread_NLN.c -# dtbmv_thread_TUU.c dtbmv_thread_TUN.c -# dtbmv_thread_TLU.c dtbmv_thread_TLN.c -# ) -#endif () - -set(DBLAS_TARGETS DBLAS_NONE DBLAS_T DBLAS_L DBLAS_U DBLAS_TU) - -foreach (DBLAS_TARGET ${DBLAS_TARGETS}) - set_target_properties(${DBLAS_TARGET} PROPERTIES COMPILE_DEFINITIONS DOUBLE) -endforeach () - + +include_directories(${CMAKE_SOURCE_DIR}) + +# sources that need to be compiled twice, once with no flags and once with LOWER +set(UL_SOURCES + sbmv_k.c + spmv_k.c + spr_k.c + spr2_k.c + syr_k.c + syr2_k.c +) + +# sources that need to be compiled several times, for UNIT, TRANS +set(NU_SOURCES + tbmv_U.c + tbsv_U.c + tpmv_U.c + tpsv_U.c + trmv_U.c + trsv_U.c + tbmv_L.c + tbsv_L.c + tpmv_L.c + tpsv_L.c + trmv_L.c + trsv_L.c +) + +# first compile all the objects that don't need specific preprocessor defines +add_library(DBLAS_NONE OBJECT + gbmv_k.c # gbmv_N + ${UL_SOURCES} + ${NU_SOURCES} +) + +# then do objects with transpose/triangular/etc definitions + +# objects that need TRANS set +add_library(DBLAS_T OBJECT gbmv_k.c ${NU_SOURCES}) +set_target_properties(DBLAS_T PROPERTIES COMPILE_DEFINITIONS "TRANS") + +# objects that need LOWER set +add_library(DBLAS_L OBJECT ${UL_SOURCES}) +set_target_properties(DBLAS_L PROPERTIES COMPILE_DEFINITIONS "LOWER") + +# objects that need UNIT set +add_library(DBLAS_U OBJECT ${NU_SOURCES}) +set_target_properties(DBLAS_U PROPERTIES COMPILE_DEFINITIONS "UNIT") + +# objects that need TRANS and UNIT set +add_library(DBLAS_TU OBJECT ${NU_SOURCES}) +set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS "UNIT;TRANS") + +#if (DEFINED SMP) +# add_library(DBLASOBJS_SMP +# dgemv_thread_n.c dgemv_thread_t.c +# dger_thread.c +# dsymv_thread_U.c dsymv_thread_L.c +# dsyr_thread_U.c dsyr_thread_L.c +# dsyr2_thread_U.c dsyr2_thread_L.c +# dspr_thread_U.c dspr_thread_L.c +# dspr2_thread_U.c dspr2_thread_L.c +# dtrmv_thread_NUU.c dtrmv_thread_NUN.c +# dtrmv_thread_NLU.c dtrmv_thread_NLN.c +# dtrmv_thread_TUU.c dtrmv_thread_TUN.c +# dtrmv_thread_TLU.c dtrmv_thread_TLN.c +# dspmv_thread_U.c dspmv_thread_L.c +# dtpmv_thread_NUU.c dtpmv_thread_NUN.c +# dtpmv_thread_NLU.c dtpmv_thread_NLN.c +# dtpmv_thread_TUU.c dtpmv_thread_TUN.c +# dtpmv_thread_TLU.c dtpmv_thread_TLN.c +# dgbmv_thread_n.c dgbmv_thread_t.c +# dsbmv_thread_U.c dsbmv_thread_L.c +# dtbmv_thread_NUU.c dtbmv_thread_NUN.c +# dtbmv_thread_NLU.c dtbmv_thread_NLN.c +# dtbmv_thread_TUU.c dtbmv_thread_TUN.c +# dtbmv_thread_TLU.c dtbmv_thread_TLN.c +# ) +#endif () + +set(DBLAS_TARGETS DBLAS_NONE DBLAS_T DBLAS_L DBLAS_U DBLAS_TU) + +foreach (DBLAS_TARGET ${DBLAS_TARGETS}) + set_target_properties(${DBLAS_TARGET} PROPERTIES COMPILE_DEFINITIONS DOUBLE) +endforeach () + diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index df6445de4..3a282a0ae 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -60,16 +60,31 @@ endfunction () # these sources are compiled with combinations of TRANS, UPPER, and UNIT, for 32 combinations total set(TRM_SOURCES trmm_L.c trmm_R.c trsm_L.c trsm_R.c) -AllCombinations("TRANS UPPER UNIT") -set(TRM_DEFINE_COMBOS LIST_OUT) -message(STATUS "alcombos result: ${LIST_OUT}") -foreach (TRM_SOURCE ${TRM_SOURCES}) - foreach (TRM_DEFINES ${TRM_DEFINE_COMBOS}) - string(REGEX MATCH "[a-z]+_[LR]" TRM_NAME ${TRM_SOURCE}) - string(TOUPPER ${TRM_NAME} TRM_NAME) - # TODO: TRM_DEFINES is a colon-separated list of defines to set for this object - need to parse it and set them using set_target_properties, and also come up with a unique id for the lib name (e.g. first letter of each define, so TRANS UPPER UNIT is TUU) - #add_library(${TRM_NAME}_${TRM_DEFINE}_OBJS OBJECT ${TRM_SOURCE}) - #set_target_properties(${TRM_NAME}_${TRM_DEFINE}_OBJS PROPERTIES COMPILE_DEFINITIONS ${TRM_DEFINE}) +AllCombinations("TRANS;UPPER;UNIT") +set(TRM_DEFINE_COMBOS ${LIST_OUT}) +foreach (trm_source ${TRM_SOURCES}) + foreach (trm_defines ${TRM_DEFINE_COMBOS}) + + # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with + string(REPLACE ":" ";" trm_defines ${trm_defines}) + + # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) + set(trm_obj_name "") + foreach (trm_define ${trm_defines}) + string(REGEX MATCH "^[A-Z][A-Z]" letter ${trm_define}) + set(trm_obj_name "${trm_obj_name}${letter}") + endforeach () + + # parse file name + string(REGEX MATCH "[a-z]+_[LR]" trm_name ${trm_source}) + string(TOUPPER ${trm_name} trm_name) + + # prepend the uppercased file name to the obj name + set(trm_obj_name "${trm_name}_${trm_obj_name}_OBJS") + + # now add the object and set the defines + add_library(${trm_obj_name} OBJECT ${trm_source}) + set_target_properties(${trm_obj_name} PROPERTIES COMPILE_DEFINITIONS "${trm_defines}") endforeach () endforeach () diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 62a889f49..c38a73f84 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -1,46 +1,45 @@ - -include_directories(${CMAKE_SOURCE_DIR}) - -# TODO: Need to generate object files for S, D, C, Q and X - start with D for now. -# The sources are the same, but there are additional preprocessor definitions depending on the precision (see Makefile.tail). - -add_library(DBLAS1OBJS OBJECT - axpy.c swap.c - copy.c scal.c - dot.c - asum.c nrm2.c - max.c # amax/min/amin compiled later from same source - rot.c rotg.c rotm.c rotmg.c - axpby.c -) - -# N.B. The original Makefile passed in -UUSE_MIN and -UUSE_ABS (where appropriate), no way to do that at a source-level in cmake. REMOVE_DEFINITIONS removes a definition for the rest of the compilation. -add_library(AMAX_OBJ OBJECT max.c) -set_target_properties(AMAX_OBJ PROPERTIES COMPILE_DEFINITIONS USE_ABS) -add_library(AMIN_OBJ OBJECT max.c) -set_target_properties(AMIN_OBJ PROPERTIES COMPILE_DEFINITIONS USE_ABS) -set_target_properties(AMIN_OBJ PROPERTIES COMPILE_DEFINITIONS USE_MIN) -add_library(MIN_OBJ OBJECT max.c) -set_target_properties(MIN_OBJ PROPERTIES COMPILE_DEFINITIONS USE_MIN) - -# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f -add_library(DBLAS2OBJS OBJECT - gemv.c ger.c - trsv.c trmv.c symv.c - syr.c syr2.c gbmv.c - sbmv.c spmv.c - spr.c spr2.c - tbsv.c tbmv.c - tpsv.c tpmv.c -) - -add_library(DBLAS3OBJS OBJECT - gemm.c symm.c - trsm.c syrk.c syr2k.c - omatcopy.c imatcopy.c -) - -# trmm is trsm with a compiler flag set -add_library(TRMM_OBJ OBJECT trsm.c) -set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS TRMM) - + +include_directories(${CMAKE_SOURCE_DIR}) + +# TODO: Need to generate object files for S, D, C, Q and X - start with D for now. +# The sources are the same, but there are additional preprocessor definitions depending on the precision (see Makefile.tail). + +add_library(DBLAS1OBJS OBJECT + axpy.c swap.c + copy.c scal.c + dot.c + asum.c nrm2.c + max.c # amax/min/amin compiled later from same source + rot.c rotg.c rotm.c rotmg.c + axpby.c +) + +# N.B. The original Makefile passed in -UUSE_MIN and -UUSE_ABS (where appropriate), no way to do that at a source-level in cmake. REMOVE_DEFINITIONS removes a definition for the rest of the compilation. +add_library(AMAX_OBJ OBJECT max.c) +set_target_properties(AMAX_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_ABS") +add_library(AMIN_OBJ OBJECT max.c) +set_target_properties(AMIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_ABS;USE_MIN") +add_library(MIN_OBJ OBJECT max.c) +set_target_properties(MIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_MIN") + +# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f +add_library(DBLAS2OBJS OBJECT + gemv.c ger.c + trsv.c trmv.c symv.c + syr.c syr2.c gbmv.c + sbmv.c spmv.c + spr.c spr2.c + tbsv.c tbmv.c + tpsv.c tpmv.c +) + +add_library(DBLAS3OBJS OBJECT + gemm.c symm.c + trsm.c syrk.c syr2k.c + omatcopy.c imatcopy.c +) + +# trmm is trsm with a compiler flag set +add_library(TRMM_OBJ OBJECT trsm.c) +set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS TRMM) + From 8d9b196e0dd3f1230b3f6e610e6a54ead64b514f Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 30 Jan 2015 12:14:44 -0600 Subject: [PATCH 019/137] Moved loop over define combos into a function. This function takes a set of sources and a set of preprocessor definitions. It will iterate over the sources and build an object file for each combination of preprocessor definitions for each source file. --- driver/level3/CMakeLists.txt | 60 ++++++++++++++++++++---------------- driver/others/CMakeLists.txt | 2 ++ 2 files changed, 35 insertions(+), 27 deletions(-) create mode 100644 driver/others/CMakeLists.txt diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 3a282a0ae..9059a46d0 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -58,35 +58,41 @@ function(AllCombinations list_in) set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) endfunction () +# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in +function(GenerateObjects sources_in defines_in) + AllCombinations("${defines_in}") + set(define_combos ${LIST_OUT}) + foreach (source_file ${sources_in}) + foreach (def_combo ${define_combos}) + + # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with + string(REPLACE ":" ";" def_combo ${def_combo}) + + # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) + set(obj_name "") + foreach (combo_elem ${def_combo}) + string(REGEX MATCH "^[A-Z][A-Z]" letter ${combo_elem}) + set(obj_name "${obj_name}${letter}") + endforeach () + + # parse file name + string(REGEX MATCH "[a-z]+_[LR]" source_name ${source_file}) + string(TOUPPER ${source_name} source_name) + + # prepend the uppercased file name to the obj name + set(obj_name "${source_name}_${obj_name}_OBJS") + + # now add the object and set the defines + add_library(${obj_name} OBJECT ${source_file}) + set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${def_combo}") + endforeach () + endforeach () +endfunction () + # these sources are compiled with combinations of TRANS, UPPER, and UNIT, for 32 combinations total set(TRM_SOURCES trmm_L.c trmm_R.c trsm_L.c trsm_R.c) -AllCombinations("TRANS;UPPER;UNIT") -set(TRM_DEFINE_COMBOS ${LIST_OUT}) -foreach (trm_source ${TRM_SOURCES}) - foreach (trm_defines ${TRM_DEFINE_COMBOS}) - - # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with - string(REPLACE ":" ";" trm_defines ${trm_defines}) - - # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) - set(trm_obj_name "") - foreach (trm_define ${trm_defines}) - string(REGEX MATCH "^[A-Z][A-Z]" letter ${trm_define}) - set(trm_obj_name "${trm_obj_name}${letter}") - endforeach () - - # parse file name - string(REGEX MATCH "[a-z]+_[LR]" trm_name ${trm_source}) - string(TOUPPER ${trm_name} trm_name) - - # prepend the uppercased file name to the obj name - set(trm_obj_name "${trm_name}_${trm_obj_name}_OBJS") - - # now add the object and set the defines - add_library(${trm_obj_name} OBJECT ${trm_source}) - set_target_properties(${trm_obj_name} PROPERTIES COMPILE_DEFINITIONS "${trm_defines}") - endforeach () -endforeach () +set(TRM_DEFINES TRANS UPPER UNIT) +GenerateObjects("${TRM_SOURCES}" "${TRM_DEFINES}") # dsymm_LU.c dsymm_LL.c dsymm_RU.c dsymm_RL.c # dsyrk_UN.c dsyrk_UT.c dsyrk_LN.c dsyrk_LT.c diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt new file mode 100644 index 000000000..2685d79c8 --- /dev/null +++ b/driver/others/CMakeLists.txt @@ -0,0 +1,2 @@ + +# NYI From 7693887d61cfe495ee37a8ed8dbb4eec54d0b3e9 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 30 Jan 2015 13:01:11 -0600 Subject: [PATCH 020/137] Added empty set to the combinations generated by AllCombinations. --- driver/level3/CMakeLists.txt | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 9059a46d0..37a9b1bd5 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -35,9 +35,10 @@ endforeach () function(AllCombinations list_in) list(LENGTH list_in list_count) set(num_combos 1) - math(EXPR num_combos "${num_combos} << ${list_count}") + # subtract 1 since we will iterate from 0 to num_combos + math(EXPR num_combos "(${num_combos} << ${list_count}) - 1") set(LIST_OUT "") - foreach (c RANGE ${num_combos}) + foreach (c RANGE 0 ${num_combos}) set(current_combo "") # this is a little ridiculous just to iterate through a list w/ indices math(EXPR last_list_index "${list_count} - 1") @@ -55,6 +56,7 @@ function(AllCombinations list_in) endforeach () list(APPEND LIST_OUT ${current_combo}) endforeach () + list(APPEND LIST_OUT " ") # Empty set is a valic combination, but CMake isn't appending the empty string for some reason, use a space set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) endfunction () @@ -76,7 +78,7 @@ function(GenerateObjects sources_in defines_in) endforeach () # parse file name - string(REGEX MATCH "[a-z]+_[LR]" source_name ${source_file}) + string(REGEX MATCH "^[a-zA-Z_]+" source_name ${source_file}) string(TOUPPER ${source_name} source_name) # prepend the uppercased file name to the obj name @@ -84,7 +86,9 @@ function(GenerateObjects sources_in defines_in) # now add the object and set the defines add_library(${obj_name} OBJECT ${source_file}) - set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${def_combo}") + if (NOT "${def_combo}" STREQUAL " ") # using space as the empty set + set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${def_combo}") + endif () endforeach () endforeach () endfunction () @@ -94,6 +98,9 @@ set(TRM_SOURCES trmm_L.c trmm_R.c trsm_L.c trsm_R.c) set(TRM_DEFINES TRANS UPPER UNIT) GenerateObjects("${TRM_SOURCES}" "${TRM_DEFINES}") +# TODO: also need to set NN for all these objs (add param to GenerateObjects for defines that apply to all +GenerateObjects("symm_k.c" "LOWER;RSIDE") + # dsymm_LU.c dsymm_LL.c dsymm_RU.c dsymm_RL.c # dsyrk_UN.c dsyrk_UT.c dsyrk_LN.c dsyrk_LT.c # dsyr2k_UN.c dsyr2k_UT.c dsyr2k_LN.c dsyr2k_LT.c From e5e7595bf913d249c5dcd1e10cea0d472386ac49 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 30 Jan 2015 13:31:13 -0600 Subject: [PATCH 021/137] Added paramater to GenerateObjects for defines that affect all sources. --- driver/level3/CMakeLists.txt | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 37a9b1bd5..ef3695e2d 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -61,7 +61,10 @@ function(AllCombinations list_in) endfunction () # generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in -function(GenerateObjects sources_in defines_in) +# @param sources_in the source files to build from +# @param defines_in the preprocessor definitions that will be combined to create the object files +# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects +function(GenerateObjects sources_in defines_in all_defines_in) AllCombinations("${defines_in}") set(define_combos ${LIST_OUT}) foreach (source_file ${sources_in}) @@ -78,7 +81,7 @@ function(GenerateObjects sources_in defines_in) endforeach () # parse file name - string(REGEX MATCH "^[a-zA-Z_]+" source_name ${source_file}) + string(REGEX MATCH "^[a-zA-Z_0-9]+" source_name ${source_file}) string(TOUPPER ${source_name} source_name) # prepend the uppercased file name to the obj name @@ -86,26 +89,23 @@ function(GenerateObjects sources_in defines_in) # now add the object and set the defines add_library(${obj_name} OBJECT ${source_file}) - if (NOT "${def_combo}" STREQUAL " ") # using space as the empty set - set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${def_combo}") + set(cur_defines ${def_combo}) + if ("${cur_defines}" STREQUAL " ") + set(cur_defines ${all_defines_in}) + else () + list(APPEND cur_defines ${all_defines_in}) + endif () + if (cur_defines AND NOT "${cur_defines}" STREQUAL " ") # using space as the empty set + set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${cur_defines}") endif () endforeach () endforeach () endfunction () -# these sources are compiled with combinations of TRANS, UPPER, and UNIT, for 32 combinations total -set(TRM_SOURCES trmm_L.c trmm_R.c trsm_L.c trsm_R.c) -set(TRM_DEFINES TRANS UPPER UNIT) -GenerateObjects("${TRM_SOURCES}" "${TRM_DEFINES}") - -# TODO: also need to set NN for all these objs (add param to GenerateObjects for defines that apply to all -GenerateObjects("symm_k.c" "LOWER;RSIDE") - -# dsymm_LU.c dsymm_LL.c dsymm_RU.c dsymm_RL.c -# dsyrk_UN.c dsyrk_UT.c dsyrk_LN.c dsyrk_LT.c -# dsyr2k_UN.c dsyr2k_UT.c dsyr2k_LN.c dsyr2k_LT.c -# dsyrk_kernel_U.c dsyrk_kernel_L.c -# dsyr2k_kernel_U.c dsyr2k_kernel_L.c +GenerateObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "DOUBLE") +GenerateObjects("symm_k.c" "LOWER;RSIDE" "NN;DOUBLE") +GenerateObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "DOUBLE") +GenerateObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "DOUBLE") #if (SMP) # From d3dcdddf7569eac76e580be123c7a59a74f6d81a Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 30 Jan 2015 13:47:40 -0600 Subject: [PATCH 022/137] Moved functions into util cmake file. --- CMakeLists.txt | 1 + cmake/prebuild.cmake | 12 ----- cmake/utils.cmake | 87 ++++++++++++++++++++++++++++++++++++ driver/level3/CMakeLists.txt | 73 ------------------------------ 4 files changed, 88 insertions(+), 85 deletions(-) create mode 100644 cmake/utils.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 25b88d565..be52d9713 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,6 +13,7 @@ project(OpenBLAS) message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with.") +include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake") include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") set(BLASDIRS interface driver/level2 driver/level3 driver/others) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 60566e3f2..9595dab0d 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -65,18 +65,6 @@ endif () include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") -# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE -function(ParseGetArchVars GETARCH_IN) - string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}") - foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) - # split the line into var and value, then assign the value to a CMake var - string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") - list(GET SPLIT_VAR 0 VAR_NAME) - list(GET SPLIT_VAR 1 VAR_VALUE) - set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) - endforeach () -endfunction () - # compile getarch enable_language(ASM) set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") diff --git a/cmake/utils.cmake b/cmake/utils.cmake new file mode 100644 index 000000000..a95695553 --- /dev/null +++ b/cmake/utils.cmake @@ -0,0 +1,87 @@ +# Functions to help with the OpenBLAS build + +# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE +function(ParseGetArchVars GETARCH_IN) + string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}") + foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) + # split the line into var and value, then assign the value to a CMake var + string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") + list(GET SPLIT_VAR 0 VAR_NAME) + list(GET SPLIT_VAR 1 VAR_VALUE) + set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) + endforeach () +endfunction () + +# Returns all combinations of the input list, as a list with colon-separated combinations +# E.g. input of A B C returns A B C A:B A:C B:C +# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). +function(AllCombinations list_in) + list(LENGTH list_in list_count) + set(num_combos 1) + # subtract 1 since we will iterate from 0 to num_combos + math(EXPR num_combos "(${num_combos} << ${list_count}) - 1") + set(LIST_OUT "") + foreach (c RANGE 0 ${num_combos}) + set(current_combo "") + # this is a little ridiculous just to iterate through a list w/ indices + math(EXPR last_list_index "${list_count} - 1") + foreach (list_index RANGE 0 ${last_list_index}) + math(EXPR bit "1 << ${list_index}") + math(EXPR combo_has_bit "${c} & ${bit}") + list(GET list_in ${list_index} list_elem) + if (combo_has_bit) + if (current_combo) + set(current_combo "${current_combo}:${list_elem}") + else () + set(current_combo ${list_elem}) + endif () + endif () + endforeach () + list(APPEND LIST_OUT ${current_combo}) + endforeach () + list(APPEND LIST_OUT " ") # Empty set is a valic combination, but CMake isn't appending the empty string for some reason, use a space + set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) +endfunction () + +# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in +# @param sources_in the source files to build from +# @param defines_in the preprocessor definitions that will be combined to create the object files +# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects +function(GenerateObjects sources_in defines_in all_defines_in) + AllCombinations("${defines_in}") + set(define_combos ${LIST_OUT}) + foreach (source_file ${sources_in}) + foreach (def_combo ${define_combos}) + + # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with + string(REPLACE ":" ";" def_combo ${def_combo}) + + # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) + set(obj_name "") + foreach (combo_elem ${def_combo}) + string(REGEX MATCH "^[A-Z][A-Z]" letter ${combo_elem}) + set(obj_name "${obj_name}${letter}") + endforeach () + + # parse file name + string(REGEX MATCH "^[a-zA-Z_0-9]+" source_name ${source_file}) + string(TOUPPER ${source_name} source_name) + + # prepend the uppercased file name to the obj name + set(obj_name "${source_name}_${obj_name}_OBJS") + + # now add the object and set the defines + add_library(${obj_name} OBJECT ${source_file}) + set(cur_defines ${def_combo}) + if ("${cur_defines}" STREQUAL " ") + set(cur_defines ${all_defines_in}) + else () + list(APPEND cur_defines ${all_defines_in}) + endif () + if (cur_defines AND NOT "${cur_defines}" STREQUAL " ") # using space as the empty set + set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${cur_defines}") + endif () + endforeach () + endforeach () +endfunction () + diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index ef3695e2d..2b5c18007 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -29,79 +29,6 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) set_target_properties(GEMM_${GEMM_DEFINE}_OBJS PROPERTIES COMPILE_DEFINITIONS ${GEMM_DEFINE}) endforeach () -# Returns all combinations of the input list, as a list with colon-separated combinations -# E.g. input of A B C returns A B C A:B A:C B:C -# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). -function(AllCombinations list_in) - list(LENGTH list_in list_count) - set(num_combos 1) - # subtract 1 since we will iterate from 0 to num_combos - math(EXPR num_combos "(${num_combos} << ${list_count}) - 1") - set(LIST_OUT "") - foreach (c RANGE 0 ${num_combos}) - set(current_combo "") - # this is a little ridiculous just to iterate through a list w/ indices - math(EXPR last_list_index "${list_count} - 1") - foreach (list_index RANGE 0 ${last_list_index}) - math(EXPR bit "1 << ${list_index}") - math(EXPR combo_has_bit "${c} & ${bit}") - list(GET list_in ${list_index} list_elem) - if (combo_has_bit) - if (current_combo) - set(current_combo "${current_combo}:${list_elem}") - else () - set(current_combo ${list_elem}) - endif () - endif () - endforeach () - list(APPEND LIST_OUT ${current_combo}) - endforeach () - list(APPEND LIST_OUT " ") # Empty set is a valic combination, but CMake isn't appending the empty string for some reason, use a space - set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) -endfunction () - -# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in -# @param sources_in the source files to build from -# @param defines_in the preprocessor definitions that will be combined to create the object files -# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects -function(GenerateObjects sources_in defines_in all_defines_in) - AllCombinations("${defines_in}") - set(define_combos ${LIST_OUT}) - foreach (source_file ${sources_in}) - foreach (def_combo ${define_combos}) - - # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with - string(REPLACE ":" ";" def_combo ${def_combo}) - - # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) - set(obj_name "") - foreach (combo_elem ${def_combo}) - string(REGEX MATCH "^[A-Z][A-Z]" letter ${combo_elem}) - set(obj_name "${obj_name}${letter}") - endforeach () - - # parse file name - string(REGEX MATCH "^[a-zA-Z_0-9]+" source_name ${source_file}) - string(TOUPPER ${source_name} source_name) - - # prepend the uppercased file name to the obj name - set(obj_name "${source_name}_${obj_name}_OBJS") - - # now add the object and set the defines - add_library(${obj_name} OBJECT ${source_file}) - set(cur_defines ${def_combo}) - if ("${cur_defines}" STREQUAL " ") - set(cur_defines ${all_defines_in}) - else () - list(APPEND cur_defines ${all_defines_in}) - endif () - if (cur_defines AND NOT "${cur_defines}" STREQUAL " ") # using space as the empty set - set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${cur_defines}") - endif () - endforeach () - endforeach () -endfunction () - GenerateObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "DOUBLE") GenerateObjects("symm_k.c" "LOWER;RSIDE" "NN;DOUBLE") GenerateObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "DOUBLE") From 3e8ea7a351fa3903dc64340a3ed4597829182ce0 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 30 Jan 2015 14:06:14 -0600 Subject: [PATCH 023/137] Added COMMONOBJS to driver/others CMakeLists.txt. --- driver/others/CMakeLists.txt | 74 +++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 2 deletions(-) diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index 2685d79c8..57b551a75 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -1,2 +1,72 @@ - -# NYI +include_directories(${CMAKE_SOURCE_DIR}) + +if (${CORE} STREQUAL "PPC440") + set(MEMORY memory_qalloc.c) +else () + set(MEMORY memory.c) +endif () + +add_library(COMMONOBJS OBJECT + ${MEMORY} + xerbla.c + abs.c # TODO: this is split into c_abs (DOUBLE unset) and z_abs (DOUBLE set) in the Makefile + openblas_set_num_threads.c + openblas_get_config.c + openblas_get_parallel.c + openblas_error_handle.c +) + +#ifdef SMP +#COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) +#ifndef NO_AFFINITY +#COMMONOBJS += init.$(SUFFIX) +#endif +#endif +# +#ifeq ($(DYNAMIC_ARCH), 1) +#COMMONOBJS += dynamic.$(SUFFIX) +#else +#COMMONOBJS += parameter.$(SUFFIX) +#endif +# +#ifdef EXPRECISION +#COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX) +#endif +# +#ifdef QUAD_PRECISION +#COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX) +#endif +# +#ifdef USE_CUDA +#COMMONOBJS += cuda_init.$(SUFFIX) +#endif +# +#ifdef FUNCTION_PROFILE +#COMMONOBJS += profile.$(SUFFIX) +#endif +# +#LIBOTHERS = libothers.$(LIBSUFFIX) +# +#ifeq ($(USE_OPENMP), 1) +#BLAS_SERVER = blas_server_omp.c +#else +#ifeq ($(OSNAME), WINNT) +#BLAS_SERVER = blas_server_win32.c +#endif +#ifeq ($(OSNAME), CYGWIN_NT) +#BLAS_SERVER = blas_server_win32.c +#endif +#ifeq ($(OSNAME), Interix) +#BLAS_SERVER = blas_server_win32.c +#endif +#endif +# +#ifndef BLAS_SERVER +#BLAS_SERVER = blas_server.c +#endif +# +#ifeq ($(DYNAMIC_ARCH), 1) +#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) +#else +#HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) +#endif From 5057a4b4dfc29f40bff0b025ff02f301ffaabdcd Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 30 Jan 2015 15:21:21 -0600 Subject: [PATCH 024/137] Added openblas add_library call that uses DBLAS_OBJS ojbects. --- CMakeLists.txt | 8 ++++++++ cmake/utils.cmake | 3 +++ driver/level2/CMakeLists.txt | 7 ++----- driver/level3/CMakeLists.txt | 9 +++++++++ driver/others/CMakeLists.txt | 2 +- interface/CMakeLists.txt | 5 ++++- 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index be52d9713..246ad3097 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -53,10 +53,18 @@ if (${NO_STATIC} AND ${NO_SHARED}) message(FATAL_ERROR "Neither static nor shared are enabled.") endif () +set(DBLAS_OBJS "") foreach (BLAS_DIR ${BLASDIRS}) add_subdirectory(${BLAS_DIR}) endforeach () +# get obj vars into format that add_library likes: $ (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) +set(TARGET_OBJS "") +foreach (DBLAS_OBJ ${DBLAS_OBJS}) + list(APPEND TARGET_OBJS "$") +endforeach () +add_library(openblas ${TARGET_OBJS}) + #Save the config files for installation # @cp Makefile.conf Makefile.conf_last # @cp config.h config_last.h diff --git a/cmake/utils.cmake b/cmake/utils.cmake index a95695553..944e24cc4 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -50,6 +50,7 @@ endfunction () function(GenerateObjects sources_in defines_in all_defines_in) AllCombinations("${defines_in}") set(define_combos ${LIST_OUT}) + set(OBJ_LIST_OUT "") foreach (source_file ${sources_in}) foreach (def_combo ${define_combos}) @@ -81,7 +82,9 @@ function(GenerateObjects sources_in defines_in all_defines_in) if (cur_defines AND NOT "${cur_defines}" STREQUAL " ") # using space as the empty set set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${cur_defines}") endif () + list(APPEND OBJ_LIST_OUT ${obj_name}) endforeach () endforeach () + set(OBJ_LIST_OUT ${OBJ_LIST_OUT} PARENT_SCOPE) endfunction () diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index ff6faab90..1fbf7c729 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -79,9 +79,6 @@ set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS "UNIT;TRANS") # ) #endif () -set(DBLAS_TARGETS DBLAS_NONE DBLAS_T DBLAS_L DBLAS_U DBLAS_TU) - -foreach (DBLAS_TARGET ${DBLAS_TARGETS}) - set_target_properties(${DBLAS_TARGET} PROPERTIES COMPILE_DEFINITIONS DOUBLE) -endforeach () +list(APPEND DBLAS_OBJS "DBLAS_NONE;DBLAS_T;DBLAS_L;DBLAS_U;DBLAS_TU") +set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 2b5c18007..c6f008baa 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -27,12 +27,18 @@ set(GEMM_DEFINES NN NT TN TT) foreach (GEMM_DEFINE ${GEMM_DEFINES}) add_library(GEMM_${GEMM_DEFINE}_OBJS OBJECT gemm.c) set_target_properties(GEMM_${GEMM_DEFINE}_OBJS PROPERTIES COMPILE_DEFINITIONS ${GEMM_DEFINE}) + list(APPEND DBLAS_OBJS GEMM_${GEMM_DEFINE}_OBJS) endforeach () + GenerateObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "DOUBLE") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateObjects("symm_k.c" "LOWER;RSIDE" "NN;DOUBLE") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "DOUBLE") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "DOUBLE") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) #if (SMP) # @@ -59,3 +65,6 @@ GenerateObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "DOUBLE") # dgemm_thread_tn.c dgemm_thread_tt.c #endif # + +set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS + diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index 57b551a75..10af485e9 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -6,7 +6,7 @@ else () set(MEMORY memory.c) endif () -add_library(COMMONOBJS OBJECT +add_library(COMMON_OBJS OBJECT ${MEMORY} xerbla.c abs.c # TODO: this is split into c_abs (DOUBLE unset) and z_abs (DOUBLE set) in the Makefile diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index c38a73f84..c8ea1cad6 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -41,5 +41,8 @@ add_library(DBLAS3OBJS OBJECT # trmm is trsm with a compiler flag set add_library(TRMM_OBJ OBJECT trsm.c) -set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS TRMM) +set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS "TRMM") + +list(APPEND DBLAS_OBJS "DBLAS1OBJS;AMAX_OBJ;AMIN_OBJ;MIN_OBJ;DBLAS2OBJS;DBLAS3OBJS;TRMM_OBJ") +set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 9e154aba58cb7efa00af1bfd4331ba22c02f9ce6 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 2 Feb 2015 12:31:15 -0600 Subject: [PATCH 025/137] Added LAPACK object files to interface CMakeLists. --- interface/CMakeLists.txt | 86 +++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index c8ea1cad6..79b3b3c09 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -1,48 +1,60 @@ include_directories(${CMAKE_SOURCE_DIR}) -# TODO: Need to generate object files for S, D, C, Q and X - start with D for now. -# The sources are the same, but there are additional preprocessor definitions depending on the precision (see Makefile.tail). +if (NOT DEFINED NO_CBLAS) + # TODO: Need to generate object files for S, D, C, Q and X - start with D for now. + # The sources are the same, but there are additional preprocessor definitions depending on the precision (see Makefile.tail). -add_library(DBLAS1OBJS OBJECT - axpy.c swap.c - copy.c scal.c - dot.c - asum.c nrm2.c - max.c # amax/min/amin compiled later from same source - rot.c rotg.c rotm.c rotmg.c - axpby.c -) + add_library(DBLAS1OBJS OBJECT + axpy.c swap.c + copy.c scal.c + dot.c + asum.c nrm2.c + max.c # amax/min/amin compiled later from same source + rot.c rotg.c rotm.c rotmg.c + axpby.c + ) -# N.B. The original Makefile passed in -UUSE_MIN and -UUSE_ABS (where appropriate), no way to do that at a source-level in cmake. REMOVE_DEFINITIONS removes a definition for the rest of the compilation. -add_library(AMAX_OBJ OBJECT max.c) -set_target_properties(AMAX_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_ABS") -add_library(AMIN_OBJ OBJECT max.c) -set_target_properties(AMIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_ABS;USE_MIN") -add_library(MIN_OBJ OBJECT max.c) -set_target_properties(MIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_MIN") + # N.B. The original Makefile passed in -UUSE_MIN and -UUSE_ABS (where appropriate), no way to do that at a source-level in cmake. REMOVE_DEFINITIONS removes a definition for the rest of the compilation. + add_library(AMAX_OBJ OBJECT max.c) + set_target_properties(AMAX_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_ABS") + add_library(AMIN_OBJ OBJECT max.c) + set_target_properties(AMIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_ABS;USE_MIN") + add_library(MIN_OBJ OBJECT max.c) + set_target_properties(MIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_MIN") -# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f -add_library(DBLAS2OBJS OBJECT - gemv.c ger.c - trsv.c trmv.c symv.c - syr.c syr2.c gbmv.c - sbmv.c spmv.c - spr.c spr2.c - tbsv.c tbmv.c - tpsv.c tpmv.c -) + # TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f + add_library(DBLAS2OBJS OBJECT + gemv.c ger.c + trsv.c trmv.c symv.c + syr.c syr2.c gbmv.c + sbmv.c spmv.c + spr.c spr2.c + tbsv.c tbmv.c + tpsv.c tpmv.c + ) -add_library(DBLAS3OBJS OBJECT - gemm.c symm.c - trsm.c syrk.c syr2k.c - omatcopy.c imatcopy.c -) + add_library(DBLAS3OBJS OBJECT + gemm.c symm.c + trsm.c syrk.c syr2k.c + omatcopy.c imatcopy.c + ) -# trmm is trsm with a compiler flag set -add_library(TRMM_OBJ OBJECT trsm.c) -set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS "TRMM") + # trmm is trsm with a compiler flag set + add_library(TRMM_OBJ OBJECT trsm.c) + set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS "TRMM") + + list(APPEND DBLAS_OBJS "DBLAS1OBJS;DBLAS2OBJS;DBLAS3OBJS;AMAX_OBJ;AMIN_OBJ;MIN_OBJ;TRMM_OBJ") +endif () + +if (NOT DEFINED NO_LAPACK) + add_library(DLAPACK_OBJS OBJECT + lapack/getrf.c lapack/getrs.c lapack/potrf.c lapack/getf2.c + lapack/potf2.c lapack/laswp.c lapack/gesv.c lapack/lauu2.c + lapack/lauum.c lapack/trti2.c lapack/trtri.c + ) + list(APPEND DBLAS_OBJS "DLAPACK_OBJS") +endif () -list(APPEND DBLAS_OBJS "DBLAS1OBJS;AMAX_OBJ;AMIN_OBJ;MIN_OBJ;DBLAS2OBJS;DBLAS3OBJS;TRMM_OBJ") set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From d11bde60d0ec4f2d597cd5493aad4f8e44b5ecff Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 2 Feb 2015 15:00:44 -0600 Subject: [PATCH 026/137] DOUBLE define for DBLAS objects is now set in main CMakeLists.txt. Since the objects are the same, could generate SINGLE/COMPLEX/etc here without having to rewrite all the object enumeration code again. --- CMakeLists.txt | 5 ++++- driver/level3/CMakeLists.txt | 8 ++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 246ad3097..bf1563d0b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,7 +39,7 @@ set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench) # all :: libs netlib tests shared -# libs: +# libs : if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") endif () @@ -61,10 +61,13 @@ endforeach () # get obj vars into format that add_library likes: $ (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) set(TARGET_OBJS "") foreach (DBLAS_OBJ ${DBLAS_OBJS}) + get_target_property(PREV_DEFS ${DBLAS_OBJ} COMPILE_DEFINITIONS) + set_target_properties(${DBLAS_OBJ} PROPERTIES COMPILE_DEFINITIONS "${PREV_DEFS};DOUBLE") list(APPEND TARGET_OBJS "$") endforeach () add_library(openblas ${TARGET_OBJS}) +# TODO: Why is the config saved here? Is this necessary with CMake? #Save the config files for installation # @cp Makefile.conf Makefile.conf_last # @cp config.h config_last.h diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index c6f008baa..57865d18b 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -31,13 +31,13 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) endforeach () -GenerateObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "DOUBLE") +GenerateObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateObjects("symm_k.c" "LOWER;RSIDE" "NN;DOUBLE") +GenerateObjects("symm_k.c" "LOWER;RSIDE" "NN") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "DOUBLE") +GenerateObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "DOUBLE") +GenerateObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) #if (SMP) From 7194424fef52a7f93d2fd0ae5e5de8488749e7e6 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 2 Feb 2015 15:21:29 -0600 Subject: [PATCH 027/137] Added missing common objects to the library. --- driver/others/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index 10af485e9..a28cf1e79 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -44,7 +44,9 @@ add_library(COMMON_OBJS OBJECT #ifdef FUNCTION_PROFILE #COMMONOBJS += profile.$(SUFFIX) #endif -# + +list(APPEND DBLAS_OBJS "COMMON_OBJS") + #LIBOTHERS = libothers.$(LIBSUFFIX) # #ifeq ($(USE_OPENMP), 1) @@ -70,3 +72,6 @@ add_library(COMMON_OBJS OBJECT #else #HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) #endif + +set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS + From 20e593a44ae02882119991f9b3e7fd493f8ab6e1 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 2 Feb 2015 16:25:30 -0600 Subject: [PATCH 028/137] Added cblas_ objects to interface CMakeLists. Naming isn't right, though, not seeing cblas_xxxx exports in the resulting library. --- interface/CMakeLists.txt | 81 ++++++++++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 29 deletions(-) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 79b3b3c09..6ef498cb5 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -1,19 +1,34 @@ include_directories(${CMAKE_SOURCE_DIR}) -if (NOT DEFINED NO_CBLAS) - # TODO: Need to generate object files for S, D, C, Q and X - start with D for now. - # The sources are the same, but there are additional preprocessor definitions depending on the precision (see Makefile.tail). +set(BLAS1_SOURCES + axpy.c swap.c + copy.c scal.c + dot.c + asum.c nrm2.c + rot.c rotg.c rotm.c rotmg.c + axpby.c +) - add_library(DBLAS1OBJS OBJECT - axpy.c swap.c - copy.c scal.c - dot.c - asum.c nrm2.c - max.c # amax/min/amin compiled later from same source - rot.c rotg.c rotm.c rotmg.c - axpby.c - ) +# TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f +set(BLAS2_SOURCES + gemv.c ger.c + trsv.c trmv.c symv.c + syr.c syr2.c gbmv.c + sbmv.c spmv.c + spr.c spr2.c + tbsv.c tbmv.c + tpsv.c tpmv.c +) + +set(BLAS3_SOURCES + gemm.c symm.c + trsm.c syrk.c syr2k.c + omatcopy.c imatcopy.c +) + + +if (NOT DEFINED NO_FBLAS) # N.B. The original Makefile passed in -UUSE_MIN and -UUSE_ABS (where appropriate), no way to do that at a source-level in cmake. REMOVE_DEFINITIONS removes a definition for the rest of the compilation. add_library(AMAX_OBJ OBJECT max.c) @@ -22,29 +37,37 @@ if (NOT DEFINED NO_CBLAS) set_target_properties(AMIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_ABS;USE_MIN") add_library(MIN_OBJ OBJECT max.c) set_target_properties(MIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_MIN") + add_library(MAX_OBJ OBJECT max.c) - # TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f - add_library(DBLAS2OBJS OBJECT - gemv.c ger.c - trsv.c trmv.c symv.c - syr.c syr2.c gbmv.c - sbmv.c spmv.c - spr.c spr2.c - tbsv.c tbmv.c - tpsv.c tpmv.c - ) - - add_library(DBLAS3OBJS OBJECT - gemm.c symm.c - trsm.c syrk.c syr2k.c - omatcopy.c imatcopy.c - ) + add_library(DBLAS1OBJS OBJECT ${BLAS1_SOURCES}) + add_library(DBLAS2OBJS OBJECT ${BLAS2_SOURCES}) + add_library(DBLAS3OBJS OBJECT ${BLAS3_SOURCES}) # trmm is trsm with a compiler flag set add_library(TRMM_OBJ OBJECT trsm.c) set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS "TRMM") - list(APPEND DBLAS_OBJS "DBLAS1OBJS;DBLAS2OBJS;DBLAS3OBJS;AMAX_OBJ;AMIN_OBJ;MIN_OBJ;TRMM_OBJ") + list(APPEND DBLAS_OBJS "DBLAS1OBJS;DBLAS2OBJS;DBLAS3OBJS;AMAX_OBJ;AMIN_OBJ;MIN_OBJ;MAX_OBJ;TRMM_OBJ") +endif () + +if (NOT DEFINED NO_CBLAS) + + add_library(ISAMAX_OBJ OBJECT imax.c) + set_target_properties(ISAMAX_OBJ PROPERTIES COMPILE_DEFINITIONS "CBLAS;USE_ABS") + + add_library(CDBLAS1_OBJS OBJECT ${BLAS1_SOURCES}) + add_library(CDBLAS2_OBJS OBJECT ${BLAS2_SOURCES}) + add_library(CDBLAS3_OBJS OBJECT ${BLAS3_SOURCES}) + + # trmm is trsm with a compiler flag set + add_library(CTRMM_OBJ OBJECT trsm.c) + set_target_properties(CTRMM_OBJ PROPERTIES COMPILE_DEFINITIONS "CBLAS;TRMM") + + set_target_properties(CDBLAS1_OBJS PROPERTIES COMPILE_DEFINITIONS "CBLAS") + set_target_properties(CDBLAS2_OBJS PROPERTIES COMPILE_DEFINITIONS "CBLAS") + set_target_properties(CDBLAS3_OBJS PROPERTIES COMPILE_DEFINITIONS "CBLAS") + + list(APPEND DBLAS_OBJS "CDBLAS1_OBJS;CDBLAS2_OBJS;CDBLAS3_OBJS;ISAMAX_OBJ;CTRMM_OBJ") endif () if (NOT DEFINED NO_LAPACK) From 31cf22cb4b8b7bbff1d69e1f4d2928002a2dc727 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 11:07:58 -0600 Subject: [PATCH 029/137] Ported OS settings from Makefile.system into new cmake file. --- cmake/os.cmake | 104 +++++++++++++++++++++++++++++++++++++++++++++ cmake/system.cmake | 6 ++- 2 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 cmake/os.cmake diff --git a/cmake/os.cmake b/cmake/os.cmake new file mode 100644 index 000000000..d897a2506 --- /dev/null +++ b/cmake/os.cmake @@ -0,0 +1,104 @@ +## +## Author: Hank Anderson +## Created: 12/29/14 +## Last Modified: 12/29/14 +## Description: Ported from portion of OpenBLAS/Makefile.system +## Detects the OS and sets appropriate variables. + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var + set(MD5SUM "md5 -r") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") + set(MD5SUM "md5 -r") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD") + set(MD5SUM "md5 -n") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + set(EXTRALIB "${EXTRALIB} -lm") + set(NO_EXPRECISION 1) +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX") + set(EXTRALIB "${EXTRALIB} -lm") +endif () + +# TODO: this is probably meant for mingw, not other windows compilers +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + + set(NEED_PIC 0) + set(NO_EXPRECISION 1) + + set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32") + + # probably not going to use these + set(SUFFIX "obj") + set(PSUFFIX "pobj") + set(LIBSUFFIX "a") + + if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") + set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") + endif () + + if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") + + # Test for supporting MS_ABI + # removed string parsing in favor of CMake's version comparison -hpa + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) + # GCC Version >=4.7 + # It is compatible with MSVC ABI. + set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") + endif () + endif () + + # Ensure the correct stack alignment on Win32 + # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 + if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") + set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") + set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2") + endif () + +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix") + set(NEED_PIC 0) + set(NO_EXPRECISION 1) + + set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin") +endif () + +if (CYGWIN) + set(NEED_PIC 0) + set(NO_EXPRECISION 1) +endif + +if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix") + if (SMP) + set(EXTRALIB "${EXTRALIB} -lpthread") + endif () +endif () + +if (QUAD_PRECISION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION") + set(NO_EXPRECISION 1) +endif () + +if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") + set(NO_EXPRECISION 1) +endif () + +if (UTEST_CHECK) + set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") + set(SANITY_CHECK 1) +endif () + +if (SANITY_CHECK) + # TODO: need some way to get $(*F) (target filename) + set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}") +endif + diff --git a/cmake/system.cmake b/cmake/system.cmake index 0753ed028..1d9c4612d 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -128,5 +128,9 @@ set(DLLWRAP "$(CROSS_SUFFIX)dllwrap") set(OBJCOPY "$(CROSS_SUFFIX)objcopy") set(OBJCONV "$(CROSS_SUFFIX)objconv") -# TODO: convert rest of Makefile.system, left off at "OS dependent settings" + +# +# OS dependent settings +# +include("${CMAKE_SOURCE_DIR}/cmake/os.cmake") From e66aa5f3b7bb58fe7e3b94461978d317deba3e39 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 11:32:20 -0600 Subject: [PATCH 030/137] Ported arch dependent settings from Makefile.system to new cmake file. --- CMakeLists.txt | 3 -- cmake/arch.cmake | 115 +++++++++++++++++++++++++++++++++++++++++++ cmake/c_check.cmake | 3 -- cmake/os.cmake | 6 +-- cmake/prebuild.cmake | 3 -- cmake/system.cmake | 8 +-- 6 files changed, 122 insertions(+), 16 deletions(-) create mode 100644 cmake/arch.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index bf1563d0b..66292940d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,5 @@ ## ## Author: Hank Anderson -## Copyright: (c) Stat-Ease, Inc. -## Created: 12/23/14 -## Last Modified: 12/23/14 ## cmake_minimum_required(VERSION 2.8.4) diff --git a/cmake/arch.cmake b/cmake/arch.cmake new file mode 100644 index 000000000..9b459ae90 --- /dev/null +++ b/cmake/arch.cmake @@ -0,0 +1,115 @@ +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Sets various variables based on architecture. + +if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") + + if (${ARCH} STREQUAL "x86") + if (NOT BINARY) + set(NO_BINARY_MODE 1) + endif () + endif () + + if (NOT NO_EXPRECISION) + if (${Fortran_COMPILER_NAME} MATCHES "gfortran.*") + # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa + if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") + set(EXPRECISION 1) + set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") + set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") + endif () + if (${CMAKE_C_COMPILER} STREQUAL "Clang") + set(EXPRECISION 1) + set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") + set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") + endif () + endif () + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "Intel") + set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") +endif () + +if (USE_OPENMP) + + if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") + set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "Clang") + message(WARNING "Clang doesn't support OpenMP yet.") + set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "Intel") + set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "PGI") + set(CCOMMON_OPT "${CCOMMON_OPT} -mp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") + set(CCOMMON_OPT "${CCOMMON_OPT} -mp") + set(CEXTRALIB "${CEXTRALIB} -lstdc++") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") + set(CCOMMON_OPT "${CCOMMON_OPT} -mp") + endif () +endif () + + +if (DYNAMIC_ARCH) + if (${ARCH} STREQUAL "x86") + set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") + endif () + + if (${ARCH} STREQUAL "x86_64") + set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") + if (NOT NO_AVX) + set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER") + endif () + if (NOT NO_AVX2) + set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL") + endif () + endif () + + if (NOT DYNAMIC_CORE) + unset(DYNAMIC_ARCH) + endif () +endif () + +if (${ARCH} STREQUAL "ia64") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) + + if (${Fortran_COMPILER_NAME} MATCHES "gfortran.*") + if (${CMAKE_C_COMPILER} STREQUAL "GNU") + # EXPRECISION = 1 + # CCOMMON_OPT += -DEXPRECISION + endif + endif +endif + +if (${ARCH} STREQUAL "mips64") + set(NO_BINARY_MODE 1) +endif + +if (${ARCH} STREQUAL "alpha") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) +endif () + +if (${ARCH} STREQUAL "arm") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) +endif () + +if (${ARCH} STREQUAL "arm64") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) +endif () + diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index d8facfedc..2fbfd5745 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -1,8 +1,5 @@ ## ## Author: Hank Anderson -## Copyright: (c) Stat-Ease, Inc. -## Created: 12/29/14 -## Last Modified: 12/29/14 ## Description: Ported from the OpenBLAS/c_check perl script. ## This is triggered by prebuild.cmake and runs before any of the code is built. ## Creates config.h and Makefile.conf. diff --git a/cmake/os.cmake b/cmake/os.cmake index d897a2506..fc2c40268 100644 --- a/cmake/os.cmake +++ b/cmake/os.cmake @@ -1,7 +1,5 @@ ## ## Author: Hank Anderson -## Created: 12/29/14 -## Last Modified: 12/29/14 ## Description: Ported from portion of OpenBLAS/Makefile.system ## Detects the OS and sets appropriate variables. @@ -58,7 +56,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") # Ensure the correct stack alignment on Win32 # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 - if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") + if (${ARCH} STREQUAL "x86") set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2") endif () @@ -88,7 +86,7 @@ if (QUAD_PRECISION) set(NO_EXPRECISION 1) endif () -if (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86") +if (${ARCH} STREQUAL "x86") set(NO_EXPRECISION 1) endif () diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 9595dab0d..8e05647a3 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -1,8 +1,5 @@ ## ## Author: Hank Anderson -## Copyright: (c) Stat-Ease, Inc. -## Created: 12/29/14 -## Last Modified: 12/29/14 ## Description: Ported from OpenBLAS/Makefile.prebuild ## This is triggered by system.cmake and runs before any of the code is built. ## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files). diff --git a/cmake/system.cmake b/cmake/system.cmake index 1d9c4612d..3fa75d65d 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -1,8 +1,5 @@ ## ## Author: Hank Anderson -## Copyright: (c) Stat-Ease, Inc. -## Created: 12/29/14 -## Last Modified: 12/29/14 ## Description: Ported from OpenBLAS/Makefile.system ## @@ -134,3 +131,8 @@ set(OBJCONV "$(CROSS_SUFFIX)objconv") # include("${CMAKE_SOURCE_DIR}/cmake/os.cmake") +# +# Architecture dependent settings +# +include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake") + From af11aff3093807bc1c0e5311cc525873e829a2bf Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 12:00:49 -0600 Subject: [PATCH 031/137] Ported C compiler settings from Makefile.system into new cmake file. --- cmake/cc.cmake | 66 ++++++++++++++++++++++++++++++++++++++++++++++ cmake/system.cmake | 12 ++++----- 2 files changed, 71 insertions(+), 7 deletions(-) create mode 100644 cmake/cc.cmake diff --git a/cmake/cc.cmake b/cmake/cc.cmake new file mode 100644 index 000000000..b6ce0e281 --- /dev/null +++ b/cmake/cc.cmake @@ -0,0 +1,66 @@ +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Sets C related variables. + +if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang") + + set(CCOMMON_OPT "${CCOMMON_OPT} -Wall") + COMMON_PROF += -fno-inline + NO_UNINITIALIZED_WARN = -Wno-uninitialized + + if (QUIET_MAKE) + set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused") + endif () + + if (NO_BINARY_MODE) + + if (${ARCH} STREQUAL "mips64") + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32") + endif () + set(BINARY_DEFINED 1) + endif () + + if (${CORE} STREQUAL "LOONGSON3A") + set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") + set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") + endif () + + if (${CORE} STREQUAL "LOONGSON3B") + set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") + set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") + endif () + + if (${OSNAME} STREQUAL "AIX") + set(BINARY_DEFINED 1) + endif () + endif () + + if (NOT BINARY_DEFINED) + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + endif () + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "PGI") + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7") + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + endif () +endif () + diff --git a/cmake/system.cmake b/cmake/system.cmake index 3fa75d65d..d46538df4 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -125,14 +125,12 @@ set(DLLWRAP "$(CROSS_SUFFIX)dllwrap") set(OBJCOPY "$(CROSS_SUFFIX)objcopy") set(OBJCONV "$(CROSS_SUFFIX)objconv") - -# -# OS dependent settings -# +# OS dependent settings include("${CMAKE_SOURCE_DIR}/cmake/os.cmake") -# -# Architecture dependent settings -# +# Architecture dependent settings include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake") +# C Compiler dependent settings +include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake") + From 2d5b442f5bdb4a2cacededabe5142b8ff1a2cadd Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 12:32:23 -0600 Subject: [PATCH 032/137] Ported Fortran configuration code from Makefile.system to fc.cmake. --- cmake/system.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index d46538df4..c81afb9a4 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -134,3 +134,6 @@ include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake") # C Compiler dependent settings include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake") +# Fortran Compiler dependent settings +include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake") + From e4bfbd8258948507512cda5f4181490641570da8 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 13:08:59 -0600 Subject: [PATCH 033/137] Added fc.cmake (forgot it in last commit). Moved a couple C compiler ifs from Makefile.system into cc.cmake. --- cmake/cc.cmake | 37 ++++++++++ cmake/fc.cmake | 193 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 cmake/fc.cmake diff --git a/cmake/cc.cmake b/cmake/cc.cmake index b6ce0e281..0cae8f9cf 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -64,3 +64,40 @@ if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") endif () endif () +if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") + + if (${ARCH} STREQUAL "mips64") + + if (NOT BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -n32") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -n64") + endif () + + if (${CORE} STREQUAL "LOONGSON3A") + set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") + endif () + + if (${CORE} STREQUAL "LOONGSON3B") + set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") + endif () + + else () + + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") + endif () + endif +endif + +if (${CMAKE_C_COMPILER} STREQUAL "SUN") + set(CCOMMON_OPT "${CCOMMON_OPT} -w") + if (${ARCH} STREQUAL "x86") + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () +endif () + diff --git a/cmake/fc.cmake b/cmake/fc.cmake new file mode 100644 index 000000000..727098d34 --- /dev/null +++ b/cmake/fc.cmake @@ -0,0 +1,193 @@ +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Sets Fortran related variables. + +if (${Fortran_COMPILER_NAME} STREQUAL "G77") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + if (NOT NO_BINARY_MODE) + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + endif () + endif () +endif () + +if (${Fortran_COMPILER_NAME} STREQUAL "G95") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + if (NOT NO_BINARY_MODE) + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + endif () + endif () +endif () + +if (${Fortran_COMPILER_NAME} STREQUAL "GFORTRAN") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc + if (NOT NO_LAPACK) + set(EXTRALIB "{EXTRALIB} -lgfortran") + endif () + if (NO_BINARY_MODE) + if (${ARCH} STREQUAL "mips64") + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") + endif () + endif () + else () + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") + endif () + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + endif () + endif () + + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp") + endif () +endif () + +if (${Fortran_COMPILER_NAME} STREQUAL "INTEL") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +if (${Fortran_COMPILER_NAME} STREQUAL "FUJITSU") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU") + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +if (${Fortran_COMPILER_NAME} STREQUAL "IBM") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM") + # FCOMMON_OPT += -qarch=440 + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -q64") + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8") + endif () + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -q32") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +if (${Fortran_COMPILER_NAME} STREQUAL "PGI") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI") + set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER") + if (BINARY64) + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -mp") + endif () +endif () + +if (${Fortran_COMPILER_NAME} STREQUAL "PATHSCALE") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE") + if (BINARY64) + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + endif () + + if (NOT ${ARCH} STREQUAL "mips64") + if (NOT BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () + else () + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") + endif () + endif () + + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -mp") + endif () +endif () + +if (${Fortran_COMPILER_NAME} STREQUAL "OPEN64") + + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64") + if (BINARY64) + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + endif () + + if (${ARCH} STREQUAL "mips64") + + if (NOT BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -n32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -n64") + endif () + + if (${CORE} STREQUAL "LOONGSON3A") + set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") + endif () + + if (${CORE} STREQUAL "LOONGSON3B") + set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") + endif () + else () + if (NOT BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () + endif () + + if (USE_OPENMP) + set(FEXTRALIB "${FEXTRALIB} -lstdc++") + set(FCOMMON_OPT "${FCOMMON_OPT} -mp") + endif () +endif () + +if (${Fortran_COMPILER_NAME} "SUN") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN") + if (${ARCH} STREQUAL "x86") + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel") + endif () +endif () + +if (${Fortran_COMPILER_NAME} STREQUAL "COMPAQ") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ") + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + From e818ace11af47ddacc5d5b9856b4d9db54ca9f98 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 13:34:41 -0600 Subject: [PATCH 034/137] Ported more of Makefile.system to CMake. --- cmake/system.cmake | 204 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 197 insertions(+), 7 deletions(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index c81afb9a4..ad4a6f3be 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -117,13 +117,13 @@ endif () unset(ARFLAGS) set(CPP "${COMPILER} -E") set(AR "${CROSS_SUFFIX}ar") -set(AS "$(CROSS_SUFFIX)as") -set(LD "$(CROSS_SUFFIX)ld") -set(RANLIB "$(CROSS_SUFFIX)ranlib") -set(NM "$(CROSS_SUFFIX)nm") -set(DLLWRAP "$(CROSS_SUFFIX)dllwrap") -set(OBJCOPY "$(CROSS_SUFFIX)objcopy") -set(OBJCONV "$(CROSS_SUFFIX)objconv") +set(AS "${CROSS_SUFFIX}as") +set(LD "${CROSS_SUFFIX}ld") +set(RANLIB "${CROSS_SUFFIX}ranlib") +set(NM "${CROSS_SUFFIX}nm") +set(DLLWRAP "${CROSS_SUFFIX}dllwrap") +set(OBJCOPY "${CROSS_SUFFIX}objcopy") +set(OBJCONV "${CROSS_SUFFIX}objconv") # OS dependent settings include("${CMAKE_SOURCE_DIR}/cmake/os.cmake") @@ -137,3 +137,193 @@ include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake") # Fortran Compiler dependent settings include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake") +if (BINARY64) + if (INTERFACE64) + # CCOMMON_OPT += -DUSE64BITINT + endif () +endif () + +if (NEED_PIC) + if (${CMAKE_C_COMPILER} STREQUAL "IBM") + set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC") + endif () + + if (${Fortran_COMPILER_NAME} STREQUAL "SUN") + set(FCOMMON_OPT "${FCOMMON_OPT} -pic") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC") + endif () +endif () + +if (DYNAMIC_ARCH) + set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH") +endif () + +if (NO_LAPACK) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK") + #Disable LAPACK C interface + set(NO_LAPACKE 1) +endif () + +if (NO_LAPACKE) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE") +endif () + +if (NO_AVX) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") +endif () + +if (${ARCH} STREQUAL "x86") + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") +endif () + +if (NO_AVX2) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2") +endif () + +if (SMP) + set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER") + + if (${ARCH} STERQUAL "mips64") + if (NOT ${CORE} STREQUAL "LOONGSON3B") + set(USE_SIMPLE_THREADED_LEVEL3 1) + endif () + endif () + + if (USE_OPENMP) + # USE_SIMPLE_THREADED_LEVEL3 = 1 + # NO_AFFINITY = 1 + set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP") + endif () + + if (BIGNUMA) + set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA") + endif () + +endif () + +if (NO_WARMUP) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP") +endif () + +if (CONSISTENT_FPCSR) + set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR") +endif () + +# Only for development +# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST") +# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST") +# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING") +# set(USE_PAPI 1) + +if (USE_PAPI) + set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI") + set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr") +endif () + +if (DYNAMIC_THREADS) + set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS") +endif () + +set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}") + +if (USE_SIMPLE_THREADED_LEVEL3) + set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3") +endif () + +if (DEFINED LIBNAMESUFFIX) + set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}") +else () + set(LIBPREFIX "libopenblas") +endif () + +if (NOT DEFINED SYMBOLPREFIX) + set(SYMBOLPREFIX "") +endif () + +if (NOT DEFINED SYMBOLSUFFIX) + set(SYMBOLSUFFIX "") +endif () + +set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}") + +# TODO: nead to convert these Makefiles +# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake + +# TODO: Need to figure out how to get $(*F) in cmake +set(CCOMMON_OPT "${CCOMMON_OPT} -DASMNAME=${FU}$(*F) -DASMFNAME=${FU}$(*F)${BU} -DNAME=$(*F)${BU} -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)${BU}\" -DCHAR_CNAME=\"$(*F)\"") + +if (${CORE} STREQUAL "PPC440") + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC") +endif () + +if (${CORE} STREQUAL "PPC440FP2") + set(STATIC_ALLOCATION 1) +endif () + +if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + set(NO_AFFINITY 1) +endif () + +if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B") + set(NO_AFFINITY 1) +endif () + +if (NO_AFFINITY) + set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY") +endif () + +if (FUNCTION_PROFILE) + set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE") +endif () + +if (HUGETLB_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB") +endif () + +if (DEFINED HUGETLBFILE_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})") +endif () + +if (STATIC_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC") +endif () + +if (DEVICEDRIVER_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"") +endif () + +if (MIXED_MEMORY_ALLOCATION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION") +endif () + +if (${OSNAME} STREQUAL "SunOS") + set(TAR gtar) + set(PATCH gpatch) + set(GREP ggrep) +else () + set(TAR tar) + set(PATCH patch) + set(GREP grep) +endif () + +if (NOT DEFINED MD5SUM) + set(MD5SUM md5sum) +endif () + +set(AWK awk) + +set(REVISION "-r${VERSION}") +string(REGEX MATCH "[0-9]+\\." MAJOR_VERSION "${VERSION}") + +if (DEBUG) + set(COMMON_OPT "${COMMON_OPT} -g") +endif () + +if (NOT DEFINED COMMON_OPT) + set(COMMON_OPT "-O2") +endif () + + From be1ce38f24f7c392589a93a053980719ad5be58f Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 14:00:29 -0600 Subject: [PATCH 035/137] Fixed some missing parentheses. --- cmake/arch.cmake | 8 ++++---- cmake/os.cmake | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 9b459ae90..b32c8b654 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -90,13 +90,13 @@ if (${ARCH} STREQUAL "ia64") if (${CMAKE_C_COMPILER} STREQUAL "GNU") # EXPRECISION = 1 # CCOMMON_OPT += -DEXPRECISION - endif - endif -endif + endif () + endif () +endif () if (${ARCH} STREQUAL "mips64") set(NO_BINARY_MODE 1) -endif +endif () if (${ARCH} STREQUAL "alpha") set(NO_BINARY_MODE 1) diff --git a/cmake/os.cmake b/cmake/os.cmake index fc2c40268..c333cdbe0 100644 --- a/cmake/os.cmake +++ b/cmake/os.cmake @@ -73,7 +73,7 @@ endif () if (CYGWIN) set(NEED_PIC 0) set(NO_EXPRECISION 1) -endif +endif () if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix") if (SMP) @@ -98,5 +98,5 @@ endif () if (SANITY_CHECK) # TODO: need some way to get $(*F) (target filename) set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}") -endif +endif () From 30be55150258a38e0ca0db133d1d4754e58f404e Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 14:21:22 -0600 Subject: [PATCH 036/137] Corrected fortran compiler name variables. Fixed some typos. Updated c_check to set ARCH and BINARY64/32. Added version variables. --- CMakeLists.txt | 4 ++++ cmake/arch.cmake | 4 ++-- cmake/c_check.cmake | 40 ++++++++++++++++++++++++++++++++++------ cmake/cc.cmake | 8 ++++---- cmake/f_check.cmake | 15 ++++++++++++++- cmake/fc.cmake | 22 +++++++++++----------- cmake/prebuild.cmake | 15 +++------------ cmake/system.cmake | 10 +++++----- 8 files changed, 77 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 66292940d..0869e6fad 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,6 +4,10 @@ cmake_minimum_required(VERSION 2.8.4) project(OpenBLAS) +set(OpenBLAS_MAJOR_VERSION 0) +set(OpenBLAS_MINOR_VERSION 2) +set(OpenBLAS_PATCH_VERSION 13) +set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # is this necessary? lapack-netlib has its own fortran checks in its CMakeLists.txt #enable_language(Fortran) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index b32c8b654..5848c2409 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -12,7 +12,7 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") endif () if (NOT NO_EXPRECISION) - if (${Fortran_COMPILER_NAME} MATCHES "gfortran.*") + if (${CMAKE_Fortran_COMPILER} MATCHES "gfortran.*") # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") set(EXPRECISION 1) @@ -86,7 +86,7 @@ if (${ARCH} STREQUAL "ia64") set(NO_BINARY_MODE 1) set(BINARY_DEFINED 1) - if (${Fortran_COMPILER_NAME} MATCHES "gfortran.*") + if (${CMAKE_Fortran_COMPILER} MATCHES "gfortran.*") if (${CMAKE_C_COMPILER} STREQUAL "GNU") # EXPRECISION = 1 # CCOMMON_OPT += -DEXPRECISION diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index 2fbfd5745..961bb00c4 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -4,6 +4,26 @@ ## This is triggered by prebuild.cmake and runs before any of the code is built. ## Creates config.h and Makefile.conf. +# CMake vars set by this file: +# OSNAME (use CMAKE_SYSTEM_NAME) +# ARCH +# C_COMPILER (use CMAKE_C_COMPILER) +# BINARY32 +# BINARY64 +# FU +# CROSS_SUFFIX +# CROSS +# CEXTRALIB + +# Defines set by this file: +# OS_ +# ARCH_ +# C_ +# __32BIT__ +# __64BIT__ +# FUNDERSCORE +# PTHREAD_CREATE_FUNC + # N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. # TODO: detect NEED_FU @@ -23,17 +43,25 @@ if (NOT DEFINED BINARY) endif () endif () +if (BINARY EQUAL 64) + set(BINARY64 1) +else () + set(BINARY32 1) +endif () + # CMake docs define these: # CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for. # CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on. -set(HOST_ARCH ${CMAKE_SYSTEM_PROCESSOR}) -if (${HOST_ARCH} STREQUAL "AMD64") - set(HOST_ARCH "X86_64") +# +# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check +set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) +if (${ARCH} STREQUAL "AMD64") + set(ARCH "X86_64") endif () # If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong -if (${HOST_ARCH} STREQUAL "X86_64" AND BINARY EQUAL 32) - set(HOST_ARCH X86) +if (${ARCH} STREQUAL "X86_64" AND BINARY EQUAL 32) + set(ARCH X86) endif () set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) @@ -43,7 +71,7 @@ endif () file(WRITE ${TARGET_CONF} "#define OS_${HOST_OS}\t1\n" - "#define ARCH_${HOST_ARCH}\t1\n" + "#define ARCH_${ARCH}\t1\n" "#define C_${COMPILER_ID}\t1\n" "#define __${BINARY}BIT__\t1\n" "#define FUNDERSCORE\t${NEED_FU}\n") diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 0cae8f9cf..28daa72dc 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -6,8 +6,8 @@ if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang") set(CCOMMON_OPT "${CCOMMON_OPT} -Wall") - COMMON_PROF += -fno-inline - NO_UNINITIALIZED_WARN = -Wno-uninitialized + set(COMMON_PROF "${COMMON_PROF} -fno-inline") + set(NO_UNINITIALIZED_WARN "-Wno-uninitialized") if (QUIET_MAKE) set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused") @@ -89,8 +89,8 @@ if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") else () set(CCOMMON_OPT "${CCOMMON_OPT} -m64") endif () - endif -endif + endif () +endif () if (${CMAKE_C_COMPILER} STREQUAL "SUN") set(CCOMMON_OPT "${CCOMMON_OPT} -w") diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index a291430aa..6c1dbdf18 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -7,8 +7,20 @@ ## This is triggered by prebuild.cmake and runs before any of the code is built. ## Appends Fortran information to config.h and Makefile.conf. +# CMake vars set by this file: +# F_COMPILER +# FC +# BU +# NOFORTRAN +# NEED2UNDERSCORES +# FEXTRALIB -if (NOT ${ONLY_CBLAS}) +# Defines set by this file: +# BUNDERSCORE +# NEEDBUNDERSCORE +# NEED2UNDERSCORES + +if (NOT ONLY_CBLAS) # N.B. f_check is not cross-platform, so instead try to use CMake variables # run f_check (appends to TARGET files) # message(STATUS "Running f_check...") @@ -30,6 +42,7 @@ else () #When we only build CBLAS, we set NOFORTRAN=2 set(NOFORTRAN 2) set(NO_FBLAS 1) + #set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler set(BU "_") file(APPEND ${TARGET_CONF} "#define BUNDERSCORE _\n" diff --git a/cmake/fc.cmake b/cmake/fc.cmake index 727098d34..61f0e0187 100644 --- a/cmake/fc.cmake +++ b/cmake/fc.cmake @@ -3,7 +3,7 @@ ## Description: Ported from portion of OpenBLAS/Makefile.system ## Sets Fortran related variables. -if (${Fortran_COMPILER_NAME} STREQUAL "G77") +if (${CMAKE_Fortran_COMPILER} STREQUAL "G77") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77") set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") if (NOT NO_BINARY_MODE) @@ -15,7 +15,7 @@ if (${Fortran_COMPILER_NAME} STREQUAL "G77") endif () endif () -if (${Fortran_COMPILER_NAME} STREQUAL "G95") +if (${CMAKE_Fortran_COMPILER} STREQUAL "G95") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95") set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") if (NOT NO_BINARY_MODE) @@ -27,7 +27,7 @@ if (${Fortran_COMPILER_NAME} STREQUAL "G95") endif () endif () -if (${Fortran_COMPILER_NAME} STREQUAL "GFORTRAN") +if (${CMAKE_Fortran_COMPILER} STREQUAL "GFORTRAN") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc @@ -58,7 +58,7 @@ if (${Fortran_COMPILER_NAME} STREQUAL "GFORTRAN") endif () endif () -if (${Fortran_COMPILER_NAME} STREQUAL "INTEL") +if (${CMAKE_Fortran_COMPILER} STREQUAL "INTEL") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") if (INTERFACE64) set(FCOMMON_OPT "${FCOMMON_OPT} -i8") @@ -68,14 +68,14 @@ if (${Fortran_COMPILER_NAME} STREQUAL "INTEL") endif () endif () -if (${Fortran_COMPILER_NAME} STREQUAL "FUJITSU") +if (${CMAKE_Fortran_COMPILER} STREQUAL "FUJITSU") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU") if (USE_OPENMP) set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") endif () endif () -if (${Fortran_COMPILER_NAME} STREQUAL "IBM") +if (${CMAKE_Fortran_COMPILER} STREQUAL "IBM") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM") # FCOMMON_OPT += -qarch=440 if (BINARY64) @@ -91,7 +91,7 @@ if (${Fortran_COMPILER_NAME} STREQUAL "IBM") endif () endif () -if (${Fortran_COMPILER_NAME} STREQUAL "PGI") +if (${CMAKE_Fortran_COMPILER} STREQUAL "PGI") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI") set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER") if (BINARY64) @@ -107,7 +107,7 @@ if (${Fortran_COMPILER_NAME} STREQUAL "PGI") endif () endif () -if (${Fortran_COMPILER_NAME} STREQUAL "PATHSCALE") +if (${CMAKE_Fortran_COMPILER} STREQUAL "PATHSCALE") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE") if (BINARY64) if (INTERFACE64) @@ -134,7 +134,7 @@ if (${Fortran_COMPILER_NAME} STREQUAL "PATHSCALE") endif () endif () -if (${Fortran_COMPILER_NAME} STREQUAL "OPEN64") +if (${CMAKE_Fortran_COMPILER} STREQUAL "OPEN64") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64") if (BINARY64) @@ -172,7 +172,7 @@ if (${Fortran_COMPILER_NAME} STREQUAL "OPEN64") endif () endif () -if (${Fortran_COMPILER_NAME} "SUN") +if (${CMAKE_Fortran_COMPILER} STREQUAL "SUN") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN") if (${ARCH} STREQUAL "x86") set(FCOMMON_OPT "${FCOMMON_OPT} -m32") @@ -184,7 +184,7 @@ if (${Fortran_COMPILER_NAME} "SUN") endif () endif () -if (${Fortran_COMPILER_NAME} STREQUAL "COMPAQ") +if (${CMAKE_Fortran_COMPILER} STREQUAL "COMPAQ") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ") if (USE_OPENMP) set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 8e05647a3..6312a515e 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -6,19 +6,10 @@ ## Next it runs f_check and appends some fortran information to the files. ## Finally it runs getarch and getarch_2nd for even more environment information. -# List of vars set by this file and included files: -# OSNAME -# ARCH -# C_COMPILER -# BINARY32 -# BINARY64 -# CEXTRALIB -# F_COMPILER -# FC -# BU -# CORE <- REQUIRED +# CMake vars set by this file: +# CORE # LIBCORE -# NUM_CORES <- REQUIRED +# NUM_CORES # HAVE_MMX # HAVE_SSE # HAVE_SSE2 diff --git a/cmake/system.cmake b/cmake/system.cmake index ad4a6f3be..f85d13f03 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -150,7 +150,7 @@ if (NEED_PIC) set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC") endif () - if (${Fortran_COMPILER_NAME} STREQUAL "SUN") + if (${CMAKE_Fortran_COMPILER} STREQUAL "SUN") set(FCOMMON_OPT "${FCOMMON_OPT} -pic") else () set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC") @@ -186,7 +186,7 @@ endif () if (SMP) set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER") - if (${ARCH} STERQUAL "mips64") + if (${ARCH} STREQUAL "mips64") if (NOT ${CORE} STREQUAL "LOONGSON3B") set(USE_SIMPLE_THREADED_LEVEL3 1) endif () @@ -299,7 +299,7 @@ if (MIXED_MEMORY_ALLOCATION) set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION") endif () -if (${OSNAME} STREQUAL "SunOS") +if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") set(TAR gtar) set(PATCH gpatch) set(GREP ggrep) @@ -315,8 +315,8 @@ endif () set(AWK awk) -set(REVISION "-r${VERSION}") -string(REGEX MATCH "[0-9]+\\." MAJOR_VERSION "${VERSION}") +set(REVISION "-r${OpenBLAS_VERSION}") +set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION}) if (DEBUG) set(COMMON_OPT "${COMMON_OPT} -g") From 0ccfa60a537b5057d05abcc2b9e57600f0528515 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 15:09:37 -0600 Subject: [PATCH 037/137] Changed fortran compiler name to be uppercase and stripped of path/ext. --- cmake/arch.cmake | 4 ++-- cmake/f_check.cmake | 4 ++++ cmake/fc.cmake | 22 +++++++++++----------- cmake/system.cmake | 6 ++---- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 5848c2409..34beb71b3 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -12,7 +12,7 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") endif () if (NOT NO_EXPRECISION) - if (${CMAKE_Fortran_COMPILER} MATCHES "gfortran.*") + if (${F_COMPILER} MATCHES "GFORTRAN") # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") set(EXPRECISION 1) @@ -86,7 +86,7 @@ if (${ARCH} STREQUAL "ia64") set(NO_BINARY_MODE 1) set(BINARY_DEFINED 1) - if (${CMAKE_Fortran_COMPILER} MATCHES "gfortran.*") + if (${F_COMPILER} MATCHES "GFORTRAN") if (${CMAKE_C_COMPILER} STREQUAL "GNU") # EXPRECISION = 1 # CCOMMON_OPT += -DEXPRECISION diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index 6c1dbdf18..53552083b 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -48,3 +48,7 @@ else () "#define BUNDERSCORE _\n" "#define NEEDBUNDERSCORE 1\n") endif() + +get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE) +string(TOUPPER ${F_COMPILER} F_COMPILER) + diff --git a/cmake/fc.cmake b/cmake/fc.cmake index 61f0e0187..d7215866c 100644 --- a/cmake/fc.cmake +++ b/cmake/fc.cmake @@ -3,7 +3,7 @@ ## Description: Ported from portion of OpenBLAS/Makefile.system ## Sets Fortran related variables. -if (${CMAKE_Fortran_COMPILER} STREQUAL "G77") +if (${F_COMPILER} STREQUAL "G77") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77") set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") if (NOT NO_BINARY_MODE) @@ -15,7 +15,7 @@ if (${CMAKE_Fortran_COMPILER} STREQUAL "G77") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "G95") +if (${F_COMPILER} STREQUAL "G95") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95") set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") if (NOT NO_BINARY_MODE) @@ -27,7 +27,7 @@ if (${CMAKE_Fortran_COMPILER} STREQUAL "G95") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "GFORTRAN") +if (${F_COMPILER} STREQUAL "GFORTRAN") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc @@ -58,7 +58,7 @@ if (${CMAKE_Fortran_COMPILER} STREQUAL "GFORTRAN") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "INTEL") +if (${F_COMPILER} STREQUAL "INTEL") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") if (INTERFACE64) set(FCOMMON_OPT "${FCOMMON_OPT} -i8") @@ -68,14 +68,14 @@ if (${CMAKE_Fortran_COMPILER} STREQUAL "INTEL") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "FUJITSU") +if (${F_COMPILER} STREQUAL "FUJITSU") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU") if (USE_OPENMP) set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "IBM") +if (${F_COMPILER} STREQUAL "IBM") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM") # FCOMMON_OPT += -qarch=440 if (BINARY64) @@ -91,7 +91,7 @@ if (${CMAKE_Fortran_COMPILER} STREQUAL "IBM") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "PGI") +if (${F_COMPILER} STREQUAL "PGI") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI") set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER") if (BINARY64) @@ -107,7 +107,7 @@ if (${CMAKE_Fortran_COMPILER} STREQUAL "PGI") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "PATHSCALE") +if (${F_COMPILER} STREQUAL "PATHSCALE") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE") if (BINARY64) if (INTERFACE64) @@ -134,7 +134,7 @@ if (${CMAKE_Fortran_COMPILER} STREQUAL "PATHSCALE") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "OPEN64") +if (${F_COMPILER} STREQUAL "OPEN64") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64") if (BINARY64) @@ -172,7 +172,7 @@ if (${CMAKE_Fortran_COMPILER} STREQUAL "OPEN64") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "SUN") +if (${F_COMPILER} STREQUAL "SUN") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN") if (${ARCH} STREQUAL "x86") set(FCOMMON_OPT "${FCOMMON_OPT} -m32") @@ -184,7 +184,7 @@ if (${CMAKE_Fortran_COMPILER} STREQUAL "SUN") endif () endif () -if (${CMAKE_Fortran_COMPILER} STREQUAL "COMPAQ") +if (${F_COMPILER} STREQUAL "COMPAQ") set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ") if (USE_OPENMP) set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") diff --git a/cmake/system.cmake b/cmake/system.cmake index f85d13f03..1f602e021 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -72,7 +72,7 @@ if (CMAKE_CXX_COMPILER STREQUAL loongcc) endif () #if don't use Fortran, it will only compile CBLAS. -if (${ONLY_CBLAS}) +if (ONLY_CBLAS) set(NO_LAPACK 1) else () set(ONLY_CBLAS 0) @@ -81,12 +81,10 @@ endif () include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake") if (NOT DEFINED NUM_THREADS) - # TODO: NUM_CORES comes from `getarch.c` or `cpuid_x86.c`. This is built and executed above in `Makefile.prebuild`, and the results are in `Makefile.conf` and `Makefile_kernel.conf`. -hpa set(NUM_THREADS ${NUM_CORES}) endif () if (${NUM_THREADS} EQUAL 1) - # TODO: was "override USE_THREAD = 0", do we need "override" here? -hpa set(USE_THREAD 0) endif () @@ -150,7 +148,7 @@ if (NEED_PIC) set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC") endif () - if (${CMAKE_Fortran_COMPILER} STREQUAL "SUN") + if (${F_COMPILER} STREQUAL "SUN") set(FCOMMON_OPT "${FCOMMON_OPT} -pic") else () set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC") From 560c96a9a7e96461bd29cf2b984dc26655d98fb7 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 15:11:15 -0600 Subject: [PATCH 038/137] Fixed newlines in some cmake files. --- cmake/arch.cmake | 230 ++++++++++++++-------------- cmake/cc.cmake | 206 ++++++++++++------------- cmake/fc.cmake | 386 +++++++++++++++++++++++------------------------ cmake/os.cmake | 204 ++++++++++++------------- 4 files changed, 513 insertions(+), 513 deletions(-) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 34beb71b3..d6fa3ed5d 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -1,115 +1,115 @@ -## -## Author: Hank Anderson -## Description: Ported from portion of OpenBLAS/Makefile.system -## Sets various variables based on architecture. - -if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") - - if (${ARCH} STREQUAL "x86") - if (NOT BINARY) - set(NO_BINARY_MODE 1) - endif () - endif () - - if (NOT NO_EXPRECISION) - if (${F_COMPILER} MATCHES "GFORTRAN") - # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa - if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") - set(EXPRECISION 1) - set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") - set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") - endif () - if (${CMAKE_C_COMPILER} STREQUAL "Clang") - set(EXPRECISION 1) - set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") - set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") - endif () - endif () - endif () -endif () - -if (${CMAKE_C_COMPILER} STREQUAL "Intel") - set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") -endif () - -if (USE_OPENMP) - - if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") - set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") - endif () - - if (${CMAKE_C_COMPILER} STREQUAL "Clang") - message(WARNING "Clang doesn't support OpenMP yet.") - set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") - endif () - - if (${CMAKE_C_COMPILER} STREQUAL "Intel") - set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") - endif () - - if (${CMAKE_C_COMPILER} STREQUAL "PGI") - set(CCOMMON_OPT "${CCOMMON_OPT} -mp") - endif () - - if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") - set(CCOMMON_OPT "${CCOMMON_OPT} -mp") - set(CEXTRALIB "${CEXTRALIB} -lstdc++") - endif () - - if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") - set(CCOMMON_OPT "${CCOMMON_OPT} -mp") - endif () -endif () - - -if (DYNAMIC_ARCH) - if (${ARCH} STREQUAL "x86") - set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") - endif () - - if (${ARCH} STREQUAL "x86_64") - set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") - if (NOT NO_AVX) - set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER") - endif () - if (NOT NO_AVX2) - set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL") - endif () - endif () - - if (NOT DYNAMIC_CORE) - unset(DYNAMIC_ARCH) - endif () -endif () - -if (${ARCH} STREQUAL "ia64") - set(NO_BINARY_MODE 1) - set(BINARY_DEFINED 1) - - if (${F_COMPILER} MATCHES "GFORTRAN") - if (${CMAKE_C_COMPILER} STREQUAL "GNU") - # EXPRECISION = 1 - # CCOMMON_OPT += -DEXPRECISION - endif () - endif () -endif () - -if (${ARCH} STREQUAL "mips64") - set(NO_BINARY_MODE 1) -endif () - -if (${ARCH} STREQUAL "alpha") - set(NO_BINARY_MODE 1) - set(BINARY_DEFINED 1) -endif () - -if (${ARCH} STREQUAL "arm") - set(NO_BINARY_MODE 1) - set(BINARY_DEFINED 1) -endif () - -if (${ARCH} STREQUAL "arm64") - set(NO_BINARY_MODE 1) - set(BINARY_DEFINED 1) -endif () - +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Sets various variables based on architecture. + +if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") + + if (${ARCH} STREQUAL "x86") + if (NOT BINARY) + set(NO_BINARY_MODE 1) + endif () + endif () + + if (NOT NO_EXPRECISION) + if (${F_COMPILER} MATCHES "GFORTRAN") + # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa + if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") + set(EXPRECISION 1) + set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") + set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") + endif () + if (${CMAKE_C_COMPILER} STREQUAL "Clang") + set(EXPRECISION 1) + set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") + set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") + endif () + endif () + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "Intel") + set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") +endif () + +if (USE_OPENMP) + + if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") + set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "Clang") + message(WARNING "Clang doesn't support OpenMP yet.") + set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "Intel") + set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "PGI") + set(CCOMMON_OPT "${CCOMMON_OPT} -mp") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") + set(CCOMMON_OPT "${CCOMMON_OPT} -mp") + set(CEXTRALIB "${CEXTRALIB} -lstdc++") + endif () + + if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") + set(CCOMMON_OPT "${CCOMMON_OPT} -mp") + endif () +endif () + + +if (DYNAMIC_ARCH) + if (${ARCH} STREQUAL "x86") + set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") + endif () + + if (${ARCH} STREQUAL "x86_64") + set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") + if (NOT NO_AVX) + set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER") + endif () + if (NOT NO_AVX2) + set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL") + endif () + endif () + + if (NOT DYNAMIC_CORE) + unset(DYNAMIC_ARCH) + endif () +endif () + +if (${ARCH} STREQUAL "ia64") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) + + if (${F_COMPILER} MATCHES "GFORTRAN") + if (${CMAKE_C_COMPILER} STREQUAL "GNU") + # EXPRECISION = 1 + # CCOMMON_OPT += -DEXPRECISION + endif () + endif () +endif () + +if (${ARCH} STREQUAL "mips64") + set(NO_BINARY_MODE 1) +endif () + +if (${ARCH} STREQUAL "alpha") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) +endif () + +if (${ARCH} STREQUAL "arm") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) +endif () + +if (${ARCH} STREQUAL "arm64") + set(NO_BINARY_MODE 1) + set(BINARY_DEFINED 1) +endif () + diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 28daa72dc..de196524f 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -1,103 +1,103 @@ -## -## Author: Hank Anderson -## Description: Ported from portion of OpenBLAS/Makefile.system -## Sets C related variables. - -if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang") - - set(CCOMMON_OPT "${CCOMMON_OPT} -Wall") - set(COMMON_PROF "${COMMON_PROF} -fno-inline") - set(NO_UNINITIALIZED_WARN "-Wno-uninitialized") - - if (QUIET_MAKE) - set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused") - endif () - - if (NO_BINARY_MODE) - - if (${ARCH} STREQUAL "mips64") - if (BINARY64) - set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64") - else () - set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32") - endif () - set(BINARY_DEFINED 1) - endif () - - if (${CORE} STREQUAL "LOONGSON3A") - set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") - set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") - endif () - - if (${CORE} STREQUAL "LOONGSON3B") - set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") - set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") - endif () - - if (${OSNAME} STREQUAL "AIX") - set(BINARY_DEFINED 1) - endif () - endif () - - if (NOT BINARY_DEFINED) - if (BINARY64) - set(CCOMMON_OPT "${CCOMMON_OPT} -m64") - else () - set(CCOMMON_OPT "${CCOMMON_OPT} -m32") - endif () - endif () -endif () - -if (${CMAKE_C_COMPILER} STREQUAL "PGI") - if (BINARY64) - set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64") - else () - set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7") - endif () -endif () - -if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") - if (BINARY64) - set(CCOMMON_OPT "${CCOMMON_OPT} -m64") - else () - set(CCOMMON_OPT "${CCOMMON_OPT} -m32") - endif () -endif () - -if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") - - if (${ARCH} STREQUAL "mips64") - - if (NOT BINARY64) - set(CCOMMON_OPT "${CCOMMON_OPT} -n32") - else () - set(CCOMMON_OPT "${CCOMMON_OPT} -n64") - endif () - - if (${CORE} STREQUAL "LOONGSON3A") - set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") - endif () - - if (${CORE} STREQUAL "LOONGSON3B") - set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") - endif () - - else () - - if (BINARY64) - set(CCOMMON_OPT "${CCOMMON_OPT} -m32") - else () - set(CCOMMON_OPT "${CCOMMON_OPT} -m64") - endif () - endif () -endif () - -if (${CMAKE_C_COMPILER} STREQUAL "SUN") - set(CCOMMON_OPT "${CCOMMON_OPT} -w") - if (${ARCH} STREQUAL "x86") - set(CCOMMON_OPT "${CCOMMON_OPT} -m32") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -m64") - endif () -endif () - +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Sets C related variables. + +if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang") + + set(CCOMMON_OPT "${CCOMMON_OPT} -Wall") + set(COMMON_PROF "${COMMON_PROF} -fno-inline") + set(NO_UNINITIALIZED_WARN "-Wno-uninitialized") + + if (QUIET_MAKE) + set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused") + endif () + + if (NO_BINARY_MODE) + + if (${ARCH} STREQUAL "mips64") + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32") + endif () + set(BINARY_DEFINED 1) + endif () + + if (${CORE} STREQUAL "LOONGSON3A") + set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") + set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") + endif () + + if (${CORE} STREQUAL "LOONGSON3B") + set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") + set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") + endif () + + if (${OSNAME} STREQUAL "AIX") + set(BINARY_DEFINED 1) + endif () + endif () + + if (NOT BINARY_DEFINED) + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + endif () + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "PGI") + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7") + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") + + if (${ARCH} STREQUAL "mips64") + + if (NOT BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -n32") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -n64") + endif () + + if (${CORE} STREQUAL "LOONGSON3A") + set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") + endif () + + if (${CORE} STREQUAL "LOONGSON3B") + set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") + endif () + + else () + + if (BINARY64) + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + else () + set(CCOMMON_OPT "${CCOMMON_OPT} -m64") + endif () + endif () +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "SUN") + set(CCOMMON_OPT "${CCOMMON_OPT} -w") + if (${ARCH} STREQUAL "x86") + set(CCOMMON_OPT "${CCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () +endif () + diff --git a/cmake/fc.cmake b/cmake/fc.cmake index d7215866c..a47865b63 100644 --- a/cmake/fc.cmake +++ b/cmake/fc.cmake @@ -1,193 +1,193 @@ -## -## Author: Hank Anderson -## Description: Ported from portion of OpenBLAS/Makefile.system -## Sets Fortran related variables. - -if (${F_COMPILER} STREQUAL "G77") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77") - set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") - if (NOT NO_BINARY_MODE) - if (BINARY64) - set(FCOMMON_OPT "${FCOMMON_OPT} -m64") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -m32") - endif () - endif () -endif () - -if (${F_COMPILER} STREQUAL "G95") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95") - set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") - if (NOT NO_BINARY_MODE) - if (BINARY64) - set(FCOMMON_OPT "${FCOMMON_OPT} -m64") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -m32") - endif () - endif () -endif () - -if (${F_COMPILER} STREQUAL "GFORTRAN") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") - set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") - #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc - if (NOT NO_LAPACK) - set(EXTRALIB "{EXTRALIB} -lgfortran") - endif () - if (NO_BINARY_MODE) - if (${ARCH} STREQUAL "mips64") - if (BINARY64) - set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") - endif () - endif () - else () - if (BINARY64) - set(FCOMMON_OPT "${FCOMMON_OPT} -m64") - if (INTERFACE64) - set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") - endif () - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -m32") - endif () - endif () - - if (USE_OPENMP) - set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp") - endif () -endif () - -if (${F_COMPILER} STREQUAL "INTEL") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") - if (INTERFACE64) - set(FCOMMON_OPT "${FCOMMON_OPT} -i8") - endif () - if (USE_OPENMP) - set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") - endif () -endif () - -if (${F_COMPILER} STREQUAL "FUJITSU") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU") - if (USE_OPENMP) - set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") - endif () -endif () - -if (${F_COMPILER} STREQUAL "IBM") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM") - # FCOMMON_OPT += -qarch=440 - if (BINARY64) - set(FCOMMON_OPT "${FCOMMON_OPT} -q64") - if (INTERFACE64) - set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8") - endif () - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -q32") - endif () - if (USE_OPENMP) - set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") - endif () -endif () - -if (${F_COMPILER} STREQUAL "PGI") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI") - set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER") - if (BINARY64) - if (INTERFACE64) - set(FCOMMON_OPT "${FCOMMON_OPT} -i8") - endif () - set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7") - endif () - if (USE_OPENMP) - set(FCOMMON_OPT "${FCOMMON_OPT} -mp") - endif () -endif () - -if (${F_COMPILER} STREQUAL "PATHSCALE") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE") - if (BINARY64) - if (INTERFACE64) - set(FCOMMON_OPT "${FCOMMON_OPT} -i8") - endif () - endif () - - if (NOT ${ARCH} STREQUAL "mips64") - if (NOT BINARY64) - set(FCOMMON_OPT "${FCOMMON_OPT} -m32") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -m64") - endif () - else () - if (BINARY64) - set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") - endif () - endif () - - if (USE_OPENMP) - set(FCOMMON_OPT "${FCOMMON_OPT} -mp") - endif () -endif () - -if (${F_COMPILER} STREQUAL "OPEN64") - - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64") - if (BINARY64) - if (INTERFACE64) - set(FCOMMON_OPT "${FCOMMON_OPT} -i8") - endif () - endif () - - if (${ARCH} STREQUAL "mips64") - - if (NOT BINARY64) - set(FCOMMON_OPT "${FCOMMON_OPT} -n32") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -n64") - endif () - - if (${CORE} STREQUAL "LOONGSON3A") - set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") - endif () - - if (${CORE} STREQUAL "LOONGSON3B") - set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") - endif () - else () - if (NOT BINARY64) - set(FCOMMON_OPT "${FCOMMON_OPT} -m32") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -m64") - endif () - endif () - - if (USE_OPENMP) - set(FEXTRALIB "${FEXTRALIB} -lstdc++") - set(FCOMMON_OPT "${FCOMMON_OPT} -mp") - endif () -endif () - -if (${F_COMPILER} STREQUAL "SUN") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN") - if (${ARCH} STREQUAL "x86") - set(FCOMMON_OPT "${FCOMMON_OPT} -m32") - else () - set(FCOMMON_OPT "${FCOMMON_OPT} -m64") - endif () - if (USE_OPENMP) - set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel") - endif () -endif () - -if (${F_COMPILER} STREQUAL "COMPAQ") - set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ") - if (USE_OPENMP) - set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") - endif () -endif () - +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Sets Fortran related variables. + +if (${F_COMPILER} STREQUAL "G77") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + if (NOT NO_BINARY_MODE) + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + endif () + endif () +endif () + +if (${F_COMPILER} STREQUAL "G95") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + if (NOT NO_BINARY_MODE) + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + endif () + endif () +endif () + +if (${F_COMPILER} STREQUAL "GFORTRAN") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") + set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") + #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc + if (NOT NO_LAPACK) + set(EXTRALIB "{EXTRALIB} -lgfortran") + endif () + if (NO_BINARY_MODE) + if (${ARCH} STREQUAL "mips64") + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") + endif () + endif () + else () + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") + endif () + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + endif () + endif () + + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "INTEL") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "FUJITSU") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU") + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "IBM") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM") + # FCOMMON_OPT += -qarch=440 + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -q64") + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8") + endif () + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -q32") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "PGI") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI") + set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER") + if (BINARY64) + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -mp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "PATHSCALE") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE") + if (BINARY64) + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + endif () + + if (NOT ${ARCH} STREQUAL "mips64") + if (NOT BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () + else () + if (BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") + endif () + endif () + + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -mp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "OPEN64") + + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64") + if (BINARY64) + if (INTERFACE64) + set(FCOMMON_OPT "${FCOMMON_OPT} -i8") + endif () + endif () + + if (${ARCH} STREQUAL "mips64") + + if (NOT BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -n32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -n64") + endif () + + if (${CORE} STREQUAL "LOONGSON3A") + set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") + endif () + + if (${CORE} STREQUAL "LOONGSON3B") + set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") + endif () + else () + if (NOT BINARY64) + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () + endif () + + if (USE_OPENMP) + set(FEXTRALIB "${FEXTRALIB} -lstdc++") + set(FCOMMON_OPT "${FCOMMON_OPT} -mp") + endif () +endif () + +if (${F_COMPILER} STREQUAL "SUN") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN") + if (${ARCH} STREQUAL "x86") + set(FCOMMON_OPT "${FCOMMON_OPT} -m32") + else () + set(FCOMMON_OPT "${FCOMMON_OPT} -m64") + endif () + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel") + endif () +endif () + +if (${F_COMPILER} STREQUAL "COMPAQ") + set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ") + if (USE_OPENMP) + set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") + endif () +endif () + diff --git a/cmake/os.cmake b/cmake/os.cmake index c333cdbe0..cf36ef62f 100644 --- a/cmake/os.cmake +++ b/cmake/os.cmake @@ -1,102 +1,102 @@ -## -## Author: Hank Anderson -## Description: Ported from portion of OpenBLAS/Makefile.system -## Detects the OS and sets appropriate variables. - -if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") - set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var - set(MD5SUM "md5 -r") -endif () - -if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") - set(MD5SUM "md5 -r") -endif () - -if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD") - set(MD5SUM "md5 -n") -endif () - -if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - set(EXTRALIB "${EXTRALIB} -lm") - set(NO_EXPRECISION 1) -endif () - -if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX") - set(EXTRALIB "${EXTRALIB} -lm") -endif () - -# TODO: this is probably meant for mingw, not other windows compilers -if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") - - set(NEED_PIC 0) - set(NO_EXPRECISION 1) - - set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32") - - # probably not going to use these - set(SUFFIX "obj") - set(PSUFFIX "pobj") - set(LIBSUFFIX "a") - - if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") - set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") - endif () - - if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") - - # Test for supporting MS_ABI - # removed string parsing in favor of CMake's version comparison -hpa - execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) - if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) - # GCC Version >=4.7 - # It is compatible with MSVC ABI. - set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") - endif () - endif () - - # Ensure the correct stack alignment on Win32 - # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 - if (${ARCH} STREQUAL "x86") - set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") - set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2") - endif () - -endif () - -if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix") - set(NEED_PIC 0) - set(NO_EXPRECISION 1) - - set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin") -endif () - -if (CYGWIN) - set(NEED_PIC 0) - set(NO_EXPRECISION 1) -endif () - -if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix") - if (SMP) - set(EXTRALIB "${EXTRALIB} -lpthread") - endif () -endif () - -if (QUAD_PRECISION) - set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION") - set(NO_EXPRECISION 1) -endif () - -if (${ARCH} STREQUAL "x86") - set(NO_EXPRECISION 1) -endif () - -if (UTEST_CHECK) - set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") - set(SANITY_CHECK 1) -endif () - -if (SANITY_CHECK) - # TODO: need some way to get $(*F) (target filename) - set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}") -endif () - +## +## Author: Hank Anderson +## Description: Ported from portion of OpenBLAS/Makefile.system +## Detects the OS and sets appropriate variables. + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") + set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var + set(MD5SUM "md5 -r") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") + set(MD5SUM "md5 -r") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD") + set(MD5SUM "md5 -n") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + set(EXTRALIB "${EXTRALIB} -lm") + set(NO_EXPRECISION 1) +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX") + set(EXTRALIB "${EXTRALIB} -lm") +endif () + +# TODO: this is probably meant for mingw, not other windows compilers +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + + set(NEED_PIC 0) + set(NO_EXPRECISION 1) + + set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32") + + # probably not going to use these + set(SUFFIX "obj") + set(PSUFFIX "pobj") + set(LIBSUFFIX "a") + + if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") + set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") + endif () + + if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") + + # Test for supporting MS_ABI + # removed string parsing in favor of CMake's version comparison -hpa + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) + # GCC Version >=4.7 + # It is compatible with MSVC ABI. + set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") + endif () + endif () + + # Ensure the correct stack alignment on Win32 + # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 + if (${ARCH} STREQUAL "x86") + set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") + set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2") + endif () + +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix") + set(NEED_PIC 0) + set(NO_EXPRECISION 1) + + set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin") +endif () + +if (CYGWIN) + set(NEED_PIC 0) + set(NO_EXPRECISION 1) +endif () + +if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix") + if (SMP) + set(EXTRALIB "${EXTRALIB} -lpthread") + endif () +endif () + +if (QUAD_PRECISION) + set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION") + set(NO_EXPRECISION 1) +endif () + +if (${ARCH} STREQUAL "x86") + set(NO_EXPRECISION 1) +endif () + +if (UTEST_CHECK) + set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") + set(SANITY_CHECK 1) +endif () + +if (SANITY_CHECK) + # TODO: need some way to get $(*F) (target filename) + set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}") +endif () + From 0beea3a5a5dc80df16aad88a5a8a37db349cb26c Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 15:33:56 -0600 Subject: [PATCH 039/137] Converted LAPACK flags from Makefile.system. --- cmake/system.cmake | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index 1f602e021..8acbb64c9 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -324,4 +324,38 @@ if (NOT DEFINED COMMON_OPT) set(COMMON_OPT "-O2") endif () +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") +# TODO: not sure what PFLAGS is -hpa +set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}") + +set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}") +# TODO: not sure what FPFLAGS is -hpa +set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}") + +#For LAPACK Fortran codes. +set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}") +set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}") + +#Disable -fopenmp for LAPACK Fortran codes on Windows. +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + message(STATUS "FFLAGS: ${LAPACK_FFLAGS}") + set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel") + foreach (FILTER_FLAG ${FILTER_FLAGS}) + string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS}) + string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS}) + endforeach () +endif () + +set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H") +if (INTERFACE64) + set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64") +endif () + +if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS") +endif () + +if (${CMAKE_C_COMPILER} STREQUAL "LSB") + set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE") +endif () From 84b3d760c4f9d63b4cbb9f48bd85469a5527a6a2 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 3 Feb 2015 16:05:01 -0600 Subject: [PATCH 040/137] Converted rest of Makefile.system to system.cmake. --- cmake/system.cmake | 164 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 163 insertions(+), 1 deletion(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index 8acbb64c9..5ad80d618 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -338,7 +338,6 @@ set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}") #Disable -fopenmp for LAPACK Fortran codes on Windows. if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") - message(STATUS "FFLAGS: ${LAPACK_FFLAGS}") set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel") foreach (FILTER_FLAG ${FILTER_FLAGS}) string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS}) @@ -359,3 +358,166 @@ if (${CMAKE_C_COMPILER} STREQUAL "LSB") set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE") endif () +if (NOT DEFINED SUFFIX) + set(SUFFIX o) +endif () + +if (NOT DEFINED PSUFFIX) + set(PSUFFIX po) +endif () + +if (NOT DEFINED LIBSUFFIX) + set(LIBSUFFIX a) +endif () + +if (DYNAMIC_ARCH) + if (DEFINED SMP) + set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}") + set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}") + else () + set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}") + set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}") + endif () +else () + if (DEFINED SMP) + set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}") + set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}") + else () + set(LIBNAME "${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}") + set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}") + endif () +endif () + + +set(LIBDLLNAME "${LIBPREFIX}.dll") +set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so") +set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib") +set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def") +set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp") +set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip") + +set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}") +set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}") + + +set(LIB_COMPONENTS BLAS) +if (NOT NO_CBLAS) + set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS") +endif () + +if (NOT NO_LAPACK) + set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK") + if (NOT NO_LAPACKE) + set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE") + endif () +endif () + +if (ONLY_CBLAS) + set(LIB_COMPONENTS CBLAS) +endif () + +#export OSNAME +#export ARCH +#export CORE +#export LIBCORE +#export PGCPATH +#export CONFIG +#export CC +#export FC +#export BU +#export FU +#export NEED2UNDERSCORES +#export USE_THREAD +#export NUM_THREADS +#export NUM_CORES +#export SMP +#export MAKEFILE_RULE +#export NEED_PIC +#export BINARY +#export BINARY32 +#export BINARY64 +#export F_COMPILER +#export C_COMPILER +#export USE_OPENMP +#export CROSS +#export CROSS_SUFFIX +#export NOFORTRAN +#export NO_FBLAS +#export EXTRALIB +#export CEXTRALIB +#export FEXTRALIB +#export HAVE_SSE +#export HAVE_SSE2 +#export HAVE_SSE3 +#export HAVE_SSSE3 +#export HAVE_SSE4_1 +#export HAVE_SSE4_2 +#export HAVE_SSE4A +#export HAVE_SSE5 +#export HAVE_AVX +#export HAVE_VFP +#export HAVE_VFPV3 +#export HAVE_VFPV4 +#export HAVE_NEON +#export KERNELDIR +#export FUNCTION_PROFILE +#export TARGET_CORE +# +#export SGEMM_UNROLL_M +#export SGEMM_UNROLL_N +#export DGEMM_UNROLL_M +#export DGEMM_UNROLL_N +#export QGEMM_UNROLL_M +#export QGEMM_UNROLL_N +#export CGEMM_UNROLL_M +#export CGEMM_UNROLL_N +#export ZGEMM_UNROLL_M +#export ZGEMM_UNROLL_N +#export XGEMM_UNROLL_M +#export XGEMM_UNROLL_N +#export CGEMM3M_UNROLL_M +#export CGEMM3M_UNROLL_N +#export ZGEMM3M_UNROLL_M +#export ZGEMM3M_UNROLL_N +#export XGEMM3M_UNROLL_M +#export XGEMM3M_UNROLL_N + + +#if (USE_CUDA) +# export CUDADIR +# export CUCC +# export CUFLAGS +# export CULIB +#endif + +#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f +# +#.f.$(SUFFIX): +# $(FC) $(FFLAGS) -c $< -o $(@F) +# +#.f.$(PSUFFIX): +# $(FC) $(FPFLAGS) -pg -c $< -o $(@F) + +# these are not cross-platform +#ifdef BINARY64 +#PATHSCALEPATH = /opt/pathscale/lib/3.1 +#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib +#else +#PATHSCALEPATH = /opt/pathscale/lib/3.1/32 +#PGIPATH = /opt/pgi/linux86/7.1-5/lib +#endif + +#ACMLPATH = /opt/acml/4.3.0 +#ifneq ($(OSNAME), Darwin) +#MKLPATH = /opt/intel/mkl/10.2.2.025/lib +#else +#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib +#endif +#ATLASPATH = /opt/atlas/3.9.17/opteron +#FLAMEPATH = $(HOME)/flame/lib +#ifneq ($(OSNAME), SunOS) +#SUNPATH = /opt/sunstudio12.1 +#else +#SUNPATH = /opt/SUNWspro +#endif + From a0aeda6187f38d8447edeb811546519aa8b04c2f Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 4 Feb 2015 10:37:34 -0600 Subject: [PATCH 041/137] Added function to set defines for the object names (e.g. -DNAME=dgemm). --- CMakeLists.txt | 2 +- cmake/c_check.cmake | 2 +- cmake/f_check.cmake | 3 +-- cmake/system.cmake | 2 +- cmake/utils.cmake | 32 ++++++++++++++++++++++++++++++++ interface/CMakeLists.txt | 9 +++++---- 6 files changed, 41 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0869e6fad..aae9c60fd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ set(OpenBLAS_PATCH_VERSION 13) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # is this necessary? lapack-netlib has its own fortran checks in its CMakeLists.txt -#enable_language(Fortran) +enable_language(Fortran) message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with.") diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index 961bb00c4..2e5ce5edc 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -26,7 +26,7 @@ # N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. -# TODO: detect NEED_FU +# TODO: detect NEED_FU/FU set(NEED_FU 1) # Convert CMake vars into the format that OpenBLAS expects diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index 53552083b..266cdbb2a 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -27,8 +27,7 @@ if (NOT ONLY_CBLAS) # execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER} # WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) - # TODO: is BU makefile macro needed? - # TODO: detect whether underscore needed, set #defines appropriately - use try_compile + # TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile # TODO: set FEXTRALIB flags a la f_check? set(BU "_") diff --git a/cmake/system.cmake b/cmake/system.cmake index 5ad80d618..c17b7502e 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -251,7 +251,7 @@ set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}") # include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake # TODO: Need to figure out how to get $(*F) in cmake -set(CCOMMON_OPT "${CCOMMON_OPT} -DASMNAME=${FU}$(*F) -DASMFNAME=${FU}$(*F)${BU} -DNAME=$(*F)${BU} -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)${BU}\" -DCHAR_CNAME=\"$(*F)\"") +#set(CCOMMON_OPT "${CCOMMON_OPT} -DASMNAME=${FU}$(*F) -DASMFNAME=${FU}$(*F)${BU} -DNAME=$(*F)${BU} -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)${BU}\" -DCHAR_CNAME=\"$(*F)\"") if (${CORE} STREQUAL "PPC440") set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC") diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 944e24cc4..6cee74974 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -88,3 +88,35 @@ function(GenerateObjects sources_in defines_in all_defines_in) set(OBJ_LIST_OUT ${OBJ_LIST_OUT} PARENT_SCOPE) endfunction () +# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition +# @param sources_in the source files to build from +# @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc) +# @param defines_in (optional) preprocessor definitions that will be applied to all objects +function(GenerateNamedObjects sources_in float_type_in defines_in) + set(OBJ_LIST_OUT "") + foreach (source_file ${sources_in}) + + get_filename_component(source_name ${source_file} NAME_WE) + + string(SUBSTRING ${float_type_in} 0 1 float_char) + string(TOLOWER ${float_char} float_char) + + # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) + set(obj_name "${float_char}${source_name}") + + # parse file name + string(REGEX MATCH "^[a-zA-Z_0-9]+" source_name ${source_file}) + string(TOUPPER ${source_name} source_name) + + # now add the object and set the defines + add_library(${obj_name} OBJECT ${source_file}) + set(obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") + list(APPEND obj_defines ${defines_in}) + list(APPEND obj_defines ${float_type_in}) + set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${obj_defines}") + + list(APPEND OBJ_LIST_OUT ${obj_name}) + + endforeach () + set(OBJ_LIST_OUT ${OBJ_LIST_OUT} PARENT_SCOPE) +endfunction () diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 6ef498cb5..6082c55e3 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -39,15 +39,16 @@ if (NOT DEFINED NO_FBLAS) set_target_properties(MIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_MIN") add_library(MAX_OBJ OBJECT max.c) - add_library(DBLAS1OBJS OBJECT ${BLAS1_SOURCES}) - add_library(DBLAS2OBJS OBJECT ${BLAS2_SOURCES}) - add_library(DBLAS3OBJS OBJECT ${BLAS3_SOURCES}) + GenerateNamedObjects("${BLAS1_SOURCES}" "DOUBLE" "") + GenerateNamedObjects("${BLAS2_SOURCES}" "DOUBLE" "") + GenerateNamedObjects("${BLAS3_SOURCES}" "DOUBLE" "") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # trmm is trsm with a compiler flag set add_library(TRMM_OBJ OBJECT trsm.c) set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS "TRMM") - list(APPEND DBLAS_OBJS "DBLAS1OBJS;DBLAS2OBJS;DBLAS3OBJS;AMAX_OBJ;AMIN_OBJ;MIN_OBJ;MAX_OBJ;TRMM_OBJ") + list(APPEND DBLAS_OBJS "AMAX_OBJ;AMIN_OBJ;MIN_OBJ;MAX_OBJ;TRMM_OBJ") endif () if (NOT DEFINED NO_CBLAS) From 5690cf3f0e26256dbe228e4dd6a419fdbf41e43e Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 4 Feb 2015 10:52:19 -0600 Subject: [PATCH 042/137] Added override for function names in GenerateNamedObjects. The BLAS interface folder should now be generated the correct objects for the DOUBLE case. --- cmake/utils.cmake | 23 +++++++++++++--------- interface/CMakeLists.txt | 41 +++++++++++++++++++++++++--------------- 2 files changed, 40 insertions(+), 24 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 6cee74974..d02ee1a41 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -92,21 +92,26 @@ endfunction () # @param sources_in the source files to build from # @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc) # @param defines_in (optional) preprocessor definitions that will be applied to all objects -function(GenerateNamedObjects sources_in float_type_in defines_in) +# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended. +# e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" +function(GenerateNamedObjects sources_in float_type_in defines_in name_in) set(OBJ_LIST_OUT "") foreach (source_file ${sources_in}) - get_filename_component(source_name ${source_file} NAME_WE) - string(SUBSTRING ${float_type_in} 0 1 float_char) string(TOLOWER ${float_char} float_char) - # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) - set(obj_name "${float_char}${source_name}") - - # parse file name - string(REGEX MATCH "^[a-zA-Z_0-9]+" source_name ${source_file}) - string(TOUPPER ${source_name} source_name) + if (NOT name_in) + get_filename_component(source_name ${source_file} NAME_WE) + set(obj_name "${float_char}${source_name}") + else () + # replace * with float_char + if (${name_in} MATCHES "\\*") + string(REPLACE "*" ${float_char} obj_name ${name_in}) + else () + set(obj_name "${float_char}${name_in}") + endif () + endif () # now add the object and set the defines add_library(${obj_name} OBJECT ${source_file}) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 6082c55e3..e2f073d37 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -30,25 +30,36 @@ set(BLAS3_SOURCES if (NOT DEFINED NO_FBLAS) - # N.B. The original Makefile passed in -UUSE_MIN and -UUSE_ABS (where appropriate), no way to do that at a source-level in cmake. REMOVE_DEFINITIONS removes a definition for the rest of the compilation. - add_library(AMAX_OBJ OBJECT max.c) - set_target_properties(AMAX_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_ABS") - add_library(AMIN_OBJ OBJECT max.c) - set_target_properties(AMIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_ABS;USE_MIN") - add_library(MIN_OBJ OBJECT max.c) - set_target_properties(MIN_OBJ PROPERTIES COMPILE_DEFINITIONS "USE_MIN") - add_library(MAX_OBJ OBJECT max.c) - - GenerateNamedObjects("${BLAS1_SOURCES}" "DOUBLE" "") - GenerateNamedObjects("${BLAS2_SOURCES}" "DOUBLE" "") - GenerateNamedObjects("${BLAS3_SOURCES}" "DOUBLE" "") + GenerateNamedObjects("${BLAS1_SOURCES}" "DOUBLE" "" "") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("${BLAS2_SOURCES}" "DOUBLE" "" "") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("${BLAS3_SOURCES}" "DOUBLE" "" "") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # trmm is trsm with a compiler flag set - add_library(TRMM_OBJ OBJECT trsm.c) - set_target_properties(TRMM_OBJ PROPERTIES COMPILE_DEFINITIONS "TRMM") + GenerateNamedObjects("trsm.c" "DOUBLE" "TRMM" "trmm") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + + # max and imax are compiled 4 times + GenerateNamedObjects("max.c" "DOUBLE" "" "") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS" "amax") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS;USE_MIN" "amin") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("max.c" "DOUBLE" "USE_MIN" "min") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + + GenerateNamedObjects("imax.c" "DOUBLE" "" "i*max") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS" "i*amax") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS;USE_MIN" "i*amin") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("imax.c" "DOUBLE" "USE_MIN" "i*min") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - list(APPEND DBLAS_OBJS "AMAX_OBJ;AMIN_OBJ;MIN_OBJ;MAX_OBJ;TRMM_OBJ") endif () if (NOT DEFINED NO_CBLAS) From 58cff2fed8d5ba02862f870745395684dac0b4f1 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 4 Feb 2015 11:30:15 -0600 Subject: [PATCH 043/137] Added CBLAS define/naming convention to GenerateNamedObjects. --- cmake/utils.cmake | 14 +++++-- interface/CMakeLists.txt | 91 ++++++++++++++++++---------------------- 2 files changed, 52 insertions(+), 53 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index d02ee1a41..f839245b1 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -94,7 +94,7 @@ endfunction () # @param defines_in (optional) preprocessor definitions that will be applied to all objects # @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended. # e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" -function(GenerateNamedObjects sources_in float_type_in defines_in name_in) +function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cblas) set(OBJ_LIST_OUT "") foreach (source_file ${sources_in}) @@ -114,10 +114,18 @@ function(GenerateNamedObjects sources_in float_type_in defines_in name_in) endif () # now add the object and set the defines - add_library(${obj_name} OBJECT ${source_file}) - set(obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") + set(obj_defines ${defines_in}) + + if (use_cblas) + set(obj_name "cblas_${obj_name}") + list(APPEND obj_defines "CBLAS") + endif () + + list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") list(APPEND obj_defines ${defines_in}) list(APPEND obj_defines ${float_type_in}) + + add_library(${obj_name} OBJECT ${source_file}) set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${obj_defines}") list(APPEND OBJ_LIST_OUT ${obj_name}) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index e2f073d37..90fca8b8f 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -27,68 +27,59 @@ set(BLAS3_SOURCES omatcopy.c imatcopy.c ) +# generate the BLAS objs once with and once without cblas +set (CBLAS_FLAGS "") if (NOT DEFINED NO_FBLAS) - - GenerateNamedObjects("${BLAS1_SOURCES}" "DOUBLE" "" "") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${BLAS2_SOURCES}" "DOUBLE" "" "") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${BLAS3_SOURCES}" "DOUBLE" "" "") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - - # trmm is trsm with a compiler flag set - GenerateNamedObjects("trsm.c" "DOUBLE" "TRMM" "trmm") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - - # max and imax are compiled 4 times - GenerateNamedObjects("max.c" "DOUBLE" "" "") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS" "amax") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS;USE_MIN" "amin") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("max.c" "DOUBLE" "USE_MIN" "min") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - - GenerateNamedObjects("imax.c" "DOUBLE" "" "i*max") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS" "i*amax") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS;USE_MIN" "i*amin") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("imax.c" "DOUBLE" "USE_MIN" "i*min") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - + list(APPEND CBLAS_FLAGS 0) endif () if (NOT DEFINED NO_CBLAS) - - add_library(ISAMAX_OBJ OBJECT imax.c) - set_target_properties(ISAMAX_OBJ PROPERTIES COMPILE_DEFINITIONS "CBLAS;USE_ABS") - - add_library(CDBLAS1_OBJS OBJECT ${BLAS1_SOURCES}) - add_library(CDBLAS2_OBJS OBJECT ${BLAS2_SOURCES}) - add_library(CDBLAS3_OBJS OBJECT ${BLAS3_SOURCES}) - - # trmm is trsm with a compiler flag set - add_library(CTRMM_OBJ OBJECT trsm.c) - set_target_properties(CTRMM_OBJ PROPERTIES COMPILE_DEFINITIONS "CBLAS;TRMM") - - set_target_properties(CDBLAS1_OBJS PROPERTIES COMPILE_DEFINITIONS "CBLAS") - set_target_properties(CDBLAS2_OBJS PROPERTIES COMPILE_DEFINITIONS "CBLAS") - set_target_properties(CDBLAS3_OBJS PROPERTIES COMPILE_DEFINITIONS "CBLAS") - - list(APPEND DBLAS_OBJS "CDBLAS1_OBJS;CDBLAS2_OBJS;CDBLAS3_OBJS;ISAMAX_OBJ;CTRMM_OBJ") + list(APPEND CBLAS_FLAGS 1) endif () +foreach (CBLAS_FLAG ${CBLAS_FLAGS}) + + GenerateNamedObjects("${BLAS1_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("${BLAS2_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("${BLAS3_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + + # trmm is trsm with a compiler flag set + GenerateNamedObjects("trsm.c" "DOUBLE" "TRMM" "trmm" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + + # max and imax are compiled 4 times + GenerateNamedObjects("max.c" "DOUBLE" "" "" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS" "amax" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("max.c" "DOUBLE" "USE_MIN" "min" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + + GenerateNamedObjects("imax.c" "DOUBLE" "" "i*max" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS" "i*amax" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("imax.c" "DOUBLE" "USE_MIN" "i*min" ${CBLAS_FLAG}) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +endforeach () + if (NOT DEFINED NO_LAPACK) - add_library(DLAPACK_OBJS OBJECT + set(LAPACK_SOURCES lapack/getrf.c lapack/getrs.c lapack/potrf.c lapack/getf2.c lapack/potf2.c lapack/laswp.c lapack/gesv.c lapack/lauu2.c lapack/lauum.c lapack/trti2.c lapack/trtri.c ) - list(APPEND DBLAS_OBJS "DLAPACK_OBJS") + GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE" "" "" 0) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 2828f6630c847904973106133fc0e897807a511e Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 4 Feb 2015 14:01:36 -0600 Subject: [PATCH 044/137] Added SMP sources to COMMONOBJS. --- cmake/system.cmake | 3 --- driver/others/CMakeLists.txt | 51 ++++++++++++++++++------------------ 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index c17b7502e..2a0678f83 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -250,9 +250,6 @@ set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}") # TODO: nead to convert these Makefiles # include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake -# TODO: Need to figure out how to get $(*F) in cmake -#set(CCOMMON_OPT "${CCOMMON_OPT} -DASMNAME=${FU}$(*F) -DASMFNAME=${FU}$(*F)${BU} -DNAME=$(*F)${BU} -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)${BU}\" -DCHAR_CNAME=\"$(*F)\"") - if (${CORE} STREQUAL "PPC440") set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC") endif () diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index a28cf1e79..0d2a2f486 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -6,8 +6,32 @@ else () set(MEMORY memory.c) endif () +if (SMP) + + if (USE_OPENMP) + set(BLAS_SERVER blas_server_omp.c) + elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + set(BLAS_SERVER blas_server_win32.c) + endif () + + if (NOT DEFINED BLAS_SERVER) + set(BLAS_SERVER blas_server.c) + endif () + + set(SMP_SOURCES + ${BLAS_SERVER} + divtable.c # TODO: Makefile has -UDOUBLE + blas_l1_thread.c + ) + + if (NOT NO_AFFINITY) + list(APPEND SMP_SOURCES init.c) + endif () +endif () + add_library(COMMON_OBJS OBJECT ${MEMORY} + ${SMP_SOURCES} xerbla.c abs.c # TODO: this is split into c_abs (DOUBLE unset) and z_abs (DOUBLE set) in the Makefile openblas_set_num_threads.c @@ -16,13 +40,6 @@ add_library(COMMON_OBJS OBJECT openblas_error_handle.c ) -#ifdef SMP -#COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) -#ifndef NO_AFFINITY -#COMMONOBJS += init.$(SUFFIX) -#endif -#endif -# #ifeq ($(DYNAMIC_ARCH), 1) #COMMONOBJS += dynamic.$(SUFFIX) #else @@ -48,25 +65,7 @@ add_library(COMMON_OBJS OBJECT list(APPEND DBLAS_OBJS "COMMON_OBJS") #LIBOTHERS = libothers.$(LIBSUFFIX) -# -#ifeq ($(USE_OPENMP), 1) -#BLAS_SERVER = blas_server_omp.c -#else -#ifeq ($(OSNAME), WINNT) -#BLAS_SERVER = blas_server_win32.c -#endif -#ifeq ($(OSNAME), CYGWIN_NT) -#BLAS_SERVER = blas_server_win32.c -#endif -#ifeq ($(OSNAME), Interix) -#BLAS_SERVER = blas_server_win32.c -#endif -#endif -# -#ifndef BLAS_SERVER -#BLAS_SERVER = blas_server.c -#endif -# + #ifeq ($(DYNAMIC_ARCH), 1) #HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) #else From 373a1bdadb2737663a9f43e0644cf5cf3ec953cb Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 4 Feb 2015 15:47:10 -0600 Subject: [PATCH 045/137] Converted lapack/Makefile to cmake. --- CMakeLists.txt | 6 ++-- lapack/CMakeLists.txt | 83 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 lapack/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index aae9c60fd..eb15fa4bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ if (DEFINED SANITY_CHECK) endif () set(SUBDIRS ${BLASDIRS}) -if (NOT ${NO_LAPACK}) +if (NOT NO_LAPACK) list(APPEND SUBDIRS lapack) endif () @@ -55,8 +55,8 @@ if (${NO_STATIC} AND ${NO_SHARED}) endif () set(DBLAS_OBJS "") -foreach (BLAS_DIR ${BLASDIRS}) - add_subdirectory(${BLAS_DIR}) +foreach (SUBDIR ${SUBDIRS}) + add_subdirectory(${SUBDIR}) endforeach () # get obj vars into format that add_library likes: $ (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt new file mode 100644 index 000000000..c6d051c76 --- /dev/null +++ b/lapack/CMakeLists.txt @@ -0,0 +1,83 @@ + +include_directories(${CMAKE_SOURCE_DIR}) + +# TODO: laswp needs arch specific code +# TODO: getrs needs to be compiled with and without TRANS (and up to TRANS=4 in the complex case) +# TODO: trti2 needs to be compiled with and without UNIT +# TODO: trtri needs to be compiled with and without UNIT + +set(LAPACK_SOURCES + getf2/getf2_k.c + getrf/getrf_single.c + getrs/getrs_single.c + potrf/potrf_U_single.c + potrf/potrf_L_single.c + potf2/potf2_U.c + potf2/potf2_L.c + lauu2/lauu2_U.c + lauu2/lauu2_L.c + lauum/lauum_U_single.c + lauum/lauum_L_single.c + trti2/trti2_U.c + trti2/trti2_L.c + trtri/trtri_U_single.c + trtri/trtri_L_single.c +) + +set(ZLAPACK_SOURCES + getf2/zgetf2_k.c + getrf/getrf_single.c + getrs/zgetrs_single.c + potrf/potrf_U_single.c + potrf/potrf_L_single.c + potf2/potf2_U.c + potf2/potf2_L.c + lauu2/zlauu2_U.c + lauu2/zlauu2_L.c + lauum/lauum_U_single.c + lauum/lauum_L_single.c + trti2/ztrti2_U.c + trti2/ztrti2_L.c + trtri/trtri_U_single.c + trtri/trtri_L_single.c +) + +GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE" "" "" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +if (SMP) + + if (USE_OPENMP) + set(GETRF_SRC getrf/getrf_parallel_omp.c) + else () + set(GETRF_SRC getrf/getrf_parallel.c) + endif () + + set(PARALLEL_SOURCES + ${GETRF_SRC} + getrs/getrs_parallel.c + potrf/potrf_U_parallel.c + potrf/potrf_L_parallel.c + lauum/lauum_U_parallel.c + lauum/lauum_L_parallel.c + trtri/trtri_U_parallel.c + trtri/trtri_L_parallel.c + ) + + set(ZPARALLEL_SOURCES + ${GETRF_SRC} + getrs/zgetrs_parallel.c + potrf/potrf_U_parallel.c + potrf/potrf_L_parallel.c + lauum/lauum_U_parallel.c + lauum/lauum_L_parallel.c + trtri/trtri_U_parallel.c + trtri/trtri_L_parallel.c + ) + + GenerateNamedObjects("${PARALLEL_SOURCES}" "DOUBLE" "" "" 0) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +endif () + +set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS + From 0d7bad1f35b8672f37fd178a5dc5d21868270860 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 5 Feb 2015 09:02:54 -0600 Subject: [PATCH 046/137] Changed GenerateObjects to append combination codes (e.g. dtrmm_TU). --- cmake/utils.cmake | 127 ++++++++++++++++++++++------------- driver/level2/CMakeLists.txt | 2 + driver/level3/CMakeLists.txt | 27 ++++++-- 3 files changed, 101 insertions(+), 55 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index f839245b1..b0c108bbc 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -15,6 +15,8 @@ endfunction () # Returns all combinations of the input list, as a list with colon-separated combinations # E.g. input of A B C returns A B C A:B A:C B:C # N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). +# @returns LIST_OUT a list of combinations +# CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen function(AllCombinations list_in) list(LENGTH list_in list_count) set(num_combos 1) @@ -22,7 +24,10 @@ function(AllCombinations list_in) math(EXPR num_combos "(${num_combos} << ${list_count}) - 1") set(LIST_OUT "") foreach (c RANGE 0 ${num_combos}) + set(current_combo "") + set(current_code "") + # this is a little ridiculous just to iterate through a list w/ indices math(EXPR last_list_index "${list_count} - 1") foreach (list_index RANGE 0 ${last_list_index}) @@ -35,57 +40,24 @@ function(AllCombinations list_in) else () set(current_combo ${list_elem}) endif () - endif () - endforeach () - list(APPEND LIST_OUT ${current_combo}) - endforeach () - list(APPEND LIST_OUT " ") # Empty set is a valic combination, but CMake isn't appending the empty string for some reason, use a space - set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) -endfunction () - -# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in -# @param sources_in the source files to build from -# @param defines_in the preprocessor definitions that will be combined to create the object files -# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects -function(GenerateObjects sources_in defines_in all_defines_in) - AllCombinations("${defines_in}") - set(define_combos ${LIST_OUT}) - set(OBJ_LIST_OUT "") - foreach (source_file ${sources_in}) - foreach (def_combo ${define_combos}) - - # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with - string(REPLACE ":" ";" def_combo ${def_combo}) - - # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) - set(obj_name "") - foreach (combo_elem ${def_combo}) - string(REGEX MATCH "^[A-Z][A-Z]" letter ${combo_elem}) - set(obj_name "${obj_name}${letter}") - endforeach () - - # parse file name - string(REGEX MATCH "^[a-zA-Z_0-9]+" source_name ${source_file}) - string(TOUPPER ${source_name} source_name) - - # prepend the uppercased file name to the obj name - set(obj_name "${source_name}_${obj_name}_OBJS") - - # now add the object and set the defines - add_library(${obj_name} OBJECT ${source_file}) - set(cur_defines ${def_combo}) - if ("${cur_defines}" STREQUAL " ") - set(cur_defines ${all_defines_in}) + string(SUBSTRING ${list_elem} 0 1 code_char) else () - list(APPEND cur_defines ${all_defines_in}) + set(code_char "N") endif () - if (cur_defines AND NOT "${cur_defines}" STREQUAL " ") # using space as the empty set - set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${cur_defines}") - endif () - list(APPEND OBJ_LIST_OUT ${obj_name}) + set(current_code "${current_code}${code_char}") endforeach () + + if (current_combo STREQUAL "") + list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space + else () + list(APPEND LIST_OUT ${current_combo}) + endif () + list(APPEND CODES_OUT ${current_code}) + endforeach () - set(OBJ_LIST_OUT ${OBJ_LIST_OUT} PARENT_SCOPE) + + set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) + set(CODES_OUT ${CODES_OUT} PARENT_SCOPE) endfunction () # generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition @@ -94,7 +66,10 @@ endfunction () # @param defines_in (optional) preprocessor definitions that will be applied to all objects # @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended. # e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" -function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cblas) +# @param replace_k_with replaces the "k" in the filename with this string (e.g. symm_k should be symm_TU) +# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) +function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cblas replace_k_with append_with) + set(OBJ_LIST_OUT "") foreach (source_file ${sources_in}) @@ -113,6 +88,12 @@ function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cb endif () endif () + if (replace_k_with) + string(REGEX REPLACE "k$" ${replace_k_with} obj_name ${obj_name}) + else () + set(obj_name "${obj_name}${append_with}") + endif () + # now add the object and set the defines set(obj_defines ${defines_in}) @@ -133,3 +114,53 @@ function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cb endforeach () set(OBJ_LIST_OUT ${OBJ_LIST_OUT} PARENT_SCOPE) endfunction () + +# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in +# @param sources_in the source files to build from +# @param defines_in the preprocessor definitions that will be combined to create the object files +# @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc) +# @param replace_k Replace the "k" in the filename with the define combo letters (else it appends). E.g. symm_k with TRANS and UNIT defined will be symm_TU. +# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects +function(GenerateCombinationObjects sources_in defines_in float_type_in all_defines_in replace_k) + + AllCombinations("${defines_in}") + set(define_combos ${LIST_OUT}) + set(define_codes ${CODES_OUT}) + + set(COMBO_OBJ_LIST_OUT "") + list(LENGTH define_combos num_combos) + math(EXPR num_combos "${num_combos} - 1") + + foreach (c RANGE 0 ${num_combos}) + + list(GET define_combos ${c} define_combo) + list(GET define_codes ${c} define_code) + + foreach (source_file ${sources_in}) + + # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with + string(REPLACE ":" ";" define_combo ${define_combo}) + + # now add the object and set the defines + set(cur_defines ${define_combo}) + if ("${cur_defines}" STREQUAL " ") + set(cur_defines ${all_defines_in}) + else () + list(APPEND cur_defines ${all_defines_in}) + endif () + + set(replace_k_name "") + set(append_name "") + if (replace_k) + set(replace_k_name ${define_code}) + else () + set(append_name ${define_code}) + endif () + + GenerateNamedObjects("${source_file}" "${float_type_in}" "${cur_defines}" "" 0 "${replace_k_name}" "${append_name}") + list(APPEND COMBO_OBJ_LIST_OUT ${obj_name}) + endforeach () + endforeach () + + set(COMBO_OBJ_LIST_OUT ${COMBO_OBJ_LIST_OUT} PARENT_SCOPE) +endfunction () diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 1fbf7c729..bf0a5857e 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -1,6 +1,8 @@ include_directories(${CMAKE_SOURCE_DIR}) +# TODO: These all need NAME/CNAME set (see GenerateNamedObjects) + # sources that need to be compiled twice, once with no flags and once with LOWER set(UL_SOURCES sbmv_k.c diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 57865d18b..4427c8ebf 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -25,19 +25,32 @@ endif () # loop through gemm.c defines set(GEMM_DEFINES NN NT TN TT) foreach (GEMM_DEFINE ${GEMM_DEFINES}) - add_library(GEMM_${GEMM_DEFINE}_OBJS OBJECT gemm.c) - set_target_properties(GEMM_${GEMM_DEFINE}_OBJS PROPERTIES COMPILE_DEFINITIONS ${GEMM_DEFINE}) - list(APPEND DBLAS_OBJS GEMM_${GEMM_DEFINE}_OBJS) + string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) + GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endforeach () +AllCombinations("TRANS;UPPER;UNIT") +set(define_combos ${LIST_OUT}) +foreach (def_combo ${define_combos}) + # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with + string(REPLACE ":" ";" def_combo ${def_combo}) -GenerateObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "") + # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) + set(obj_name "") + foreach (combo_elem ${def_combo}) + string(REGEX MATCH "^[A-Z][A-Z]" letter ${combo_elem}) + set(obj_name "${obj_name}${letter}") + endforeach () +endforeach () + +GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "DOUBLE" "" 0) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateObjects("symm_k.c" "LOWER;RSIDE" "NN") +GenerateCombinationObjects("symm_k.c" "LOWER;RSIDE" "DOUBLE" "NN" 1) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "") +GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "DOUBLE" "" 1) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "") +GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "DOUBLE" "" 1) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) #if (SMP) From cfaf1c678f85c4b6428030069edea02f903764df Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 5 Feb 2015 09:17:18 -0600 Subject: [PATCH 047/137] Added option to append define codes with an underscore. Fixed the code array not getting reset on subsequent AllCombinations calls. --- cmake/utils.cmake | 12 +++++++++--- driver/level3/CMakeLists.txt | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index b0c108bbc..276375740 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -23,6 +23,7 @@ function(AllCombinations list_in) # subtract 1 since we will iterate from 0 to num_combos math(EXPR num_combos "(${num_combos} << ${list_count}) - 1") set(LIST_OUT "") + set(CODES_OUT "") foreach (c RANGE 0 ${num_combos}) set(current_combo "") @@ -119,8 +120,8 @@ endfunction () # @param sources_in the source files to build from # @param defines_in the preprocessor definitions that will be combined to create the object files # @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc) -# @param replace_k Replace the "k" in the filename with the define combo letters (else it appends). E.g. symm_k with TRANS and UNIT defined will be symm_TU. # @param all_defines_in (optional) preprocessor definitions that will be applied to all objects +# @param replace_k If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k with TRANS and UNIT defined will be symm_TU. If 0, appends, or if 2 appends with an underscore. function(GenerateCombinationObjects sources_in defines_in float_type_in all_defines_in replace_k) AllCombinations("${defines_in}") @@ -151,10 +152,14 @@ function(GenerateCombinationObjects sources_in defines_in float_type_in all_defi set(replace_k_name "") set(append_name "") - if (replace_k) + if (replace_k EQUAL 1) set(replace_k_name ${define_code}) else () - set(append_name ${define_code}) + if (replace_k EQUAL 2) + set(append_name "_${define_code}") + else () + set(append_name ${define_code}) + endif () endif () GenerateNamedObjects("${source_file}" "${float_type_in}" "${cur_defines}" "" 0 "${replace_k_name}" "${append_name}") @@ -164,3 +169,4 @@ function(GenerateCombinationObjects sources_in defines_in float_type_in all_defi set(COMBO_OBJ_LIST_OUT ${COMBO_OBJ_LIST_OUT} PARENT_SCOPE) endfunction () + diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 4427c8ebf..02a6097e3 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -50,7 +50,7 @@ GenerateCombinationObjects("symm_k.c" "LOWER;RSIDE" "DOUBLE" "NN" 1) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "DOUBLE" "" 1) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "DOUBLE" "" 1) +GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "DOUBLE" "" 2) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) #if (SMP) From 461e6911270e0cfbe917720284984d23c4ae8fce Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 5 Feb 2015 09:23:47 -0600 Subject: [PATCH 048/137] Codes when define is absent are now a parameter to AllCombinations. The level3 object names should now be correct. --- cmake/utils.cmake | 9 +++++---- driver/level3/CMakeLists.txt | 22 ++++------------------ 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 276375740..075c0ccc2 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -15,9 +15,10 @@ endfunction () # Returns all combinations of the input list, as a list with colon-separated combinations # E.g. input of A B C returns A B C A:B A:C B:C # N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). +# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present. # @returns LIST_OUT a list of combinations # CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen -function(AllCombinations list_in) +function(AllCombinations list_in absent_codes_in) list(LENGTH list_in list_count) set(num_combos 1) # subtract 1 since we will iterate from 0 to num_combos @@ -43,7 +44,7 @@ function(AllCombinations list_in) endif () string(SUBSTRING ${list_elem} 0 1 code_char) else () - set(code_char "N") + list(GET absent_codes_in ${list_index} code_char) endif () set(current_code "${current_code}${code_char}") endforeach () @@ -122,9 +123,9 @@ endfunction () # @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc) # @param all_defines_in (optional) preprocessor definitions that will be applied to all objects # @param replace_k If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k with TRANS and UNIT defined will be symm_TU. If 0, appends, or if 2 appends with an underscore. -function(GenerateCombinationObjects sources_in defines_in float_type_in all_defines_in replace_k) +function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_type_in all_defines_in replace_k) - AllCombinations("${defines_in}") + AllCombinations("${defines_in}" "${absent_codes_in}") set(define_combos ${LIST_OUT}) set(define_codes ${CODES_OUT}) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 02a6097e3..d9d4da709 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -30,27 +30,13 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endforeach () -AllCombinations("TRANS;UPPER;UNIT") -set(define_combos ${LIST_OUT}) -foreach (def_combo ${define_combos}) - # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with - string(REPLACE ":" ";" def_combo ${def_combo}) - - # build a unique variable name for this obj file by picking two letters from the defines (can't use one in this case) - set(obj_name "") - foreach (combo_elem ${def_combo}) - string(REGEX MATCH "^[A-Z][A-Z]" letter ${combo_elem}) - set(obj_name "${obj_name}${letter}") - endforeach () -endforeach () - -GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "DOUBLE" "" 0) +GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "DOUBLE" "" 0) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateCombinationObjects("symm_k.c" "LOWER;RSIDE" "DOUBLE" "NN" 1) +GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "DOUBLE" "NN" 1) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "DOUBLE" "" 1) +GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "U;N" "DOUBLE" "" 1) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "DOUBLE" "" 2) +GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "DOUBLE" "" 2) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) #if (SMP) From 1b62a4f3c9989edf0c4cfdefddb8621ac0638c24 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 5 Feb 2015 09:39:40 -0600 Subject: [PATCH 049/137] Changed some function parameters to optional. --- cmake/utils.cmake | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 075c0ccc2..26900514d 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -70,7 +70,15 @@ endfunction () # e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" # @param replace_k_with replaces the "k" in the filename with this string (e.g. symm_k should be symm_TU) # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) -function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cblas replace_k_with append_with) +function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cblas) + + if (DEFINED ARGV5) + set(replace_k_with ${ARGV5}) + endif () + + if (DEFINED ARGV6) + set(append_with ${ARGV6}) + endif () set(OBJ_LIST_OUT "") foreach (source_file ${sources_in}) From 943fa2fb580a4301652820b75458ddd58718b3df Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 5 Feb 2015 10:49:11 -0600 Subject: [PATCH 050/137] Fixed object names in level2. --- cmake/utils.cmake | 41 +++++++++++++++++++++++------------- driver/level2/CMakeLists.txt | 35 ++++++++++-------------------- driver/level3/CMakeLists.txt | 10 ++++----- 3 files changed, 42 insertions(+), 44 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 26900514d..2a1d105e3 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -68,12 +68,12 @@ endfunction () # @param defines_in (optional) preprocessor definitions that will be applied to all objects # @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended. # e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" -# @param replace_k_with replaces the "k" in the filename with this string (e.g. symm_k should be symm_TU) +# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cblas) if (DEFINED ARGV5) - set(replace_k_with ${ARGV5}) + set(replace_last_with ${ARGV5}) endif () if (DEFINED ARGV6) @@ -98,8 +98,8 @@ function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cb endif () endif () - if (replace_k_with) - string(REGEX REPLACE "k$" ${replace_k_with} obj_name ${obj_name}) + if (replace_last_with) + string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name}) else () set(obj_name "${obj_name}${append_with}") endif () @@ -130,8 +130,11 @@ endfunction () # @param defines_in the preprocessor definitions that will be combined to create the object files # @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc) # @param all_defines_in (optional) preprocessor definitions that will be applied to all objects -# @param replace_k If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k with TRANS and UNIT defined will be symm_TU. If 0, appends, or if 2 appends with an underscore. -function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_type_in all_defines_in replace_k) +# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU. +# If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU. +# If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU. +# If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects). +function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_type_in all_defines_in replace_scheme) AllCombinations("${defines_in}" "${absent_codes_in}") set(define_combos ${LIST_OUT}) @@ -159,20 +162,28 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_ list(APPEND cur_defines ${all_defines_in}) endif () - set(replace_k_name "") - set(append_name "") - if (replace_k EQUAL 1) - set(replace_k_name ${define_code}) + set(replace_code "") + set(append_code "") + if (replace_scheme EQUAL 1) + set(replace_code ${define_code}) else () - if (replace_k EQUAL 2) - set(append_name "_${define_code}") + if (replace_scheme EQUAL 2) + set(append_code "_${define_code}") + elseif (replace_scheme EQUAL 3) + # first extract the last letter + string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file}) + string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match + # break the code up into the first letter and the remaining (should only be 2 anyway) + string(SUBSTRING ${define_code} 0 1 define_code_first) + string(SUBSTRING ${define_code} 1 -1 define_code_second) + set(replace_code "${define_code_first}${last_letter}${define_code_second}") else () - set(append_name ${define_code}) + set(append_code ${define_code}) # replace_scheme should be 0 endif () endif () - GenerateNamedObjects("${source_file}" "${float_type_in}" "${cur_defines}" "" 0 "${replace_k_name}" "${append_name}") - list(APPEND COMBO_OBJ_LIST_OUT ${obj_name}) + GenerateNamedObjects("${source_file}" "${float_type_in}" "${cur_defines}" "" 0 "${replace_code}" "${append_code}") + list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}") endforeach () endforeach () diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index bf0a5857e..d596668c4 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -1,8 +1,6 @@ include_directories(${CMAKE_SOURCE_DIR}) -# TODO: These all need NAME/CNAME set (see GenerateNamedObjects) - # sources that need to be compiled twice, once with no flags and once with LOWER set(UL_SOURCES sbmv_k.c @@ -29,30 +27,20 @@ set(NU_SOURCES trsv_L.c ) -# first compile all the objects that don't need specific preprocessor defines -add_library(DBLAS_NONE OBJECT - gbmv_k.c # gbmv_N - ${UL_SOURCES} - ${NU_SOURCES} -) - -# then do objects with transpose/triangular/etc definitions - -# objects that need TRANS set -add_library(DBLAS_T OBJECT gbmv_k.c ${NU_SOURCES}) -set_target_properties(DBLAS_T PROPERTIES COMPILE_DEFINITIONS "TRANS") - # objects that need LOWER set -add_library(DBLAS_L OBJECT ${UL_SOURCES}) -set_target_properties(DBLAS_L PROPERTIES COMPILE_DEFINITIONS "LOWER") - -# objects that need UNIT set -add_library(DBLAS_U OBJECT ${NU_SOURCES}) -set_target_properties(DBLAS_U PROPERTIES COMPILE_DEFINITIONS "UNIT") +GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "DOUBLE" "" 1) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) # objects that need TRANS and UNIT set -add_library(DBLAS_TU OBJECT ${NU_SOURCES}) -set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS "UNIT;TRANS") +# N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* because of course why not have a different naming scheme for every single object -hpa +GenerateCombinationObjects("${NU_SOURCES}" "TRANS;UNIT" "N;N" "DOUBLE" "" 3) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) + +# gbmv uses a lowercase n and t. WHY? WHO KNOWS! +GenerateNamedObjects("gbmv_k.c" "DOUBLE" "" "gbmv_n" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("gbmv_k.c" "DOUBLE" "TRANS" "gbmv_t" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) #if (DEFINED SMP) # add_library(DBLASOBJS_SMP @@ -81,6 +69,5 @@ set_target_properties(DBLAS_TU PROPERTIES COMPILE_DEFINITIONS "UNIT;TRANS") # ) #endif () -list(APPEND DBLAS_OBJS "DBLAS_NONE;DBLAS_T;DBLAS_L;DBLAS_U;DBLAS_TU") set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index d9d4da709..61133ce92 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -26,18 +26,18 @@ endif () set(GEMM_DEFINES NN NT TN TT) foreach (GEMM_DEFINE ${GEMM_DEFINES}) string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) - GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "") + GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endforeach () GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "DOUBLE" "" 0) -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "DOUBLE" "NN" 1) -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "U;N" "DOUBLE" "" 1) -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "DOUBLE" "" 2) -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) #if (SMP) # From 627d5e7401e1935b6faffa11654ab7e9c07f204d Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 5 Feb 2015 12:22:48 -0600 Subject: [PATCH 051/137] Added SMP objects to driver/level3. --- cmake/utils.cmake | 13 ++++++++++--- driver/level3/CMakeLists.txt | 31 ++++++++++++++++++------------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 2a1d105e3..286f271e2 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -83,8 +83,10 @@ function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cb set(OBJ_LIST_OUT "") foreach (source_file ${sources_in}) - string(SUBSTRING ${float_type_in} 0 1 float_char) - string(TOLOWER ${float_char} float_char) + if (NOT float_type_in STREQUAL "") + string(SUBSTRING ${float_type_in} 0 1 float_char) + string(TOLOWER ${float_char} float_char) + endif () if (NOT name_in) get_filename_component(source_name ${source_file} NAME_WE) @@ -134,8 +136,13 @@ endfunction () # If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU. # If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU. # If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects). +# @param alternate_name replaces the source name as the object name (define codes are still appended) function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_type_in all_defines_in replace_scheme) + if (DEFINED ARGV6) + set(alternate_name ${ARGV6}) + endif () + AllCombinations("${defines_in}" "${absent_codes_in}") set(define_combos ${LIST_OUT}) set(define_codes ${CODES_OUT}) @@ -182,7 +189,7 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_ endif () endif () - GenerateNamedObjects("${source_file}" "${float_type_in}" "${cur_defines}" "" 0 "${replace_code}" "${append_code}") + GenerateNamedObjects("${source_file}" "${float_type_in}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}") list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}") endforeach () endforeach () diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 61133ce92..687664949 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -28,6 +28,10 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + endif () endforeach () GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "DOUBLE" "" 0) @@ -39,19 +43,20 @@ list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "DOUBLE" "" 2) list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) -#if (SMP) -# -# COMMONOBJS += gemm_thread_m.c gemm_thread_n.c gemm_thread_mn.c gemm_thread_variable.c -# COMMONOBJS += syrk_thread.c -# -# if (USE_SIMPLE_THREADED_LEVEL3) -# DBLASOBJS += dgemm_thread_nn.c dgemm_thread_nt.c dgemm_thread_tn.c dgemm_thread_tt.c -# DBLASOBJS += dsymm_thread_LU.c dsymm_thread_LL.c dsymm_thread_RU.c dsymm_thread_RL.c -# DBLASOBJS += dsyrk_thread_UN.c dsyrk_thread_UT.c dsyrk_thread_LN.c dsyrk_thread_LT.c -# -# endif () -#endif () -# +if (SMP) + + # N.B. these do NOT have a float type (e.g. DOUBLE) defined! + GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" "" 0) + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + + if (NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "DOUBLE" "THREADED_LEVEL3" 2 "syrk_thread") + list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) + GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "DOUBLE" "THREADED_LEVEL3;NN" 2 "symm_thread") + list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) + endif () +endif () + #HPLOBJS = # dgemm_nn.c dgemm_nt.c dgemm_tn.c dgemm_tt.c # dtrsm_LNUU.c dtrsm_LNUN.c dtrsm_LNLU.c dtrsm_LNLN.c From 189fadfde03fec13fec682d2c7bf003eced900d0 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 5 Feb 2015 21:05:11 -0600 Subject: [PATCH 052/137] Started implementing kernel/Makefile in cmake. --- CMakeLists.txt | 4 +- cmake/c_check.cmake | 6 +-- cmake/prebuild.cmake | 1 - kernel/CMakeLists.txt | 85 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 90 insertions(+), 6 deletions(-) create mode 100644 kernel/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index eb15fa4bc..41cb52b23 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,8 +9,8 @@ set(OpenBLAS_MINOR_VERSION 2) set(OpenBLAS_PATCH_VERSION 13) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") -# is this necessary? lapack-netlib has its own fortran checks in its CMakeLists.txt enable_language(Fortran) +enable_language(ASM) message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with.") @@ -19,7 +19,7 @@ include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") set(BLASDIRS interface driver/level2 driver/level3 driver/others) -if (NOT ${DYNAMIC_ARCH}) +if (NOT DYNAMIC_ARCH) list(APPEND BLASDIRS kernel) endif () diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index 2e5ce5edc..aaa3da7bc 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -26,8 +26,8 @@ # N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. -# TODO: detect NEED_FU/FU -set(NEED_FU 1) +# TODO: detect FU (front underscore) by compiling ctest1.c +set(FU "_") # Convert CMake vars into the format that OpenBLAS expects string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS) @@ -74,5 +74,5 @@ file(WRITE ${TARGET_CONF} "#define ARCH_${ARCH}\t1\n" "#define C_${COMPILER_ID}\t1\n" "#define __${BINARY}BIT__\t1\n" - "#define FUNDERSCORE\t${NEED_FU}\n") + "#define FUNDERSCORE\t${FU}\n") diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 6312a515e..32faeeea7 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -54,7 +54,6 @@ include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") # compile getarch -enable_language(ASM) set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") file(MAKE_DIRECTORY ${GETARCH_DIR}) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt new file mode 100644 index 000000000..a36cb8332 --- /dev/null +++ b/kernel/CMakeLists.txt @@ -0,0 +1,85 @@ + +include_directories(${CMAKE_SOURCE_DIR}) + +# TODO: need to read ${KERNELDIR}/KERNEL into CMake vars + + +# Makefile.L1 + +# these are using hardcoded filenames for now, should get them from the KERNEL vars, e.g. DAMAXKERNEL instead of amax.S +GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "USE_ABS" "amax_k" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "USE_ABS;USE_MIN" "amin_k" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "" "max_k" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "" "min_k" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "USE_ABS" "i*amax_k" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "USE_ABS;USE_MIN" "i*amin_k" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "" "i*max_k" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "" "i*min_k" 0) +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +#DBLASOBJS += \ +# dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \ +# dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \ +# daxpby_k$(TSUFFIX).$(SUFFIX) + +# Makefile.L2 +#DBLASOBJS += \ +# dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) dsymv_U$(TSUFFIX).$(SUFFIX) dsymv_L$(TSUFFIX).$(SUFFIX) \ +# dger_k$(TSUFFIX).$(SUFFIX) + + +# Makefile.L3 +#DKERNELOBJS += \ +# dgemm_kernel$(TSUFFIX).$(SUFFIX) \ +# $(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \ +# $(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ) + +#DBLASOBJS += \ +# dgemm_beta$(TSUFFIX).$(SUFFIX) \ +# dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ +# dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ +# dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ +# dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ + +#DBLASOBJS += \ +# dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ +# dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ +# dtrmm_iutucopy$(TSUFFIX).$(SUFFIX) dtrmm_iutncopy$(TSUFFIX).$(SUFFIX) \ +# dtrmm_iltucopy$(TSUFFIX).$(SUFFIX) dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) \ +# dtrmm_ounucopy$(TSUFFIX).$(SUFFIX) dtrmm_ounncopy$(TSUFFIX).$(SUFFIX) \ +# dtrmm_olnucopy$(TSUFFIX).$(SUFFIX) dtrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ +# dtrmm_outucopy$(TSUFFIX).$(SUFFIX) dtrmm_outncopy$(TSUFFIX).$(SUFFIX) \ +# dtrmm_oltucopy$(TSUFFIX).$(SUFFIX) dtrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ +# dtrsm_iunucopy$(TSUFFIX).$(SUFFIX) dtrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ +# dtrsm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ +# dtrsm_iutucopy$(TSUFFIX).$(SUFFIX) dtrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ +# dtrsm_iltucopy$(TSUFFIX).$(SUFFIX) dtrsm_iltncopy$(TSUFFIX).$(SUFFIX) \ +# dtrsm_ounucopy$(TSUFFIX).$(SUFFIX) dtrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ +# dtrsm_olnucopy$(TSUFFIX).$(SUFFIX) dtrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ +# dtrsm_outucopy$(TSUFFIX).$(SUFFIX) dtrsm_outncopy$(TSUFFIX).$(SUFFIX) \ +# dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ +# dsymm_iutcopy$(TSUFFIX).$(SUFFIX) dsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ +# dsymm_outcopy$(TSUFFIX).$(SUFFIX) dsymm_oltcopy$(TSUFFIX).$(SUFFIX) + +#DBLASOBJS += \ +# domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ +# domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) + +# Makefile.LA +#DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX) + +set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 13d2d48e67d5d76714690e4fb6ab46e5a678431a Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 6 Feb 2015 13:42:20 -0600 Subject: [PATCH 053/137] Added yet another naming scheme for lapack functions. --- cmake/utils.cmake | 15 ++++++++++++++- lapack/CMakeLists.txt | 24 +++++++++++++++++++++--- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 286f271e2..641e7a7c6 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -136,6 +136,7 @@ endfunction () # If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU. # If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU. # If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects). +# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel # @param alternate_name replaces the source name as the object name (define codes are still appended) function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_type_in all_defines_in replace_scheme) @@ -184,7 +185,19 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_ string(SUBSTRING ${define_code} 0 1 define_code_first) string(SUBSTRING ${define_code} 1 -1 define_code_second) set(replace_code "${define_code_first}${last_letter}${define_code_second}") - else () + elseif (replace_scheme EQUAL 4) + # insert code before the last underscore and pass that in as the alternate_name + get_filename_component(alternate_name ${source_file} NAME_WE) + set(extra_underscore "") + # check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel) + string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name}) + string(LENGTH "${underscores}" underscores) + if (underscores EQUAL 0) + set(extra_underscore "_") + endif () + string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name}) + message(STATUS ${alternate_name}) + else() set(append_code ${define_code}) # replace_scheme should be 0 endif () endif () diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index c6d051c76..664ce6d6e 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -4,12 +4,10 @@ include_directories(${CMAKE_SOURCE_DIR}) # TODO: laswp needs arch specific code # TODO: getrs needs to be compiled with and without TRANS (and up to TRANS=4 in the complex case) # TODO: trti2 needs to be compiled with and without UNIT -# TODO: trtri needs to be compiled with and without UNIT set(LAPACK_SOURCES getf2/getf2_k.c getrf/getrf_single.c - getrs/getrs_single.c potrf/potrf_U_single.c potrf/potrf_L_single.c potf2/potf2_U.c @@ -20,6 +18,15 @@ set(LAPACK_SOURCES lauum/lauum_L_single.c trti2/trti2_U.c trti2/trti2_L.c +) + +# sources that need TRANS set +set(TRANS_SOURCES + getrs/getrs_single.c +) + +# sources that need UNIT set +set(UNIT_SOURCES trtri/trtri_U_single.c trtri/trtri_L_single.c ) @@ -55,11 +62,17 @@ if (SMP) set(PARALLEL_SOURCES ${GETRF_SRC} - getrs/getrs_parallel.c potrf/potrf_U_parallel.c potrf/potrf_L_parallel.c lauum/lauum_U_parallel.c lauum/lauum_L_parallel.c + ) + + list(APPEND TRANS_SOURCES + getrs/getrs_parallel.c + ) + + list(APPEND UNIT_SOURCES trtri/trtri_U_parallel.c trtri/trtri_L_parallel.c ) @@ -79,5 +92,10 @@ if (SMP) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () +GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "DOUBLE" "" 4) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) +GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "DOUBLE" "" 4) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) + set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 6b5d26e07b017f75a803ff64ac12d700fc298478 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 6 Feb 2015 16:52:19 -0600 Subject: [PATCH 054/137] Added SMP sources to level2 CMakeLists.txt. --- cmake/utils.cmake | 17 ++++++++- driver/level2/CMakeLists.txt | 71 ++++++++++++++++++++---------------- 2 files changed, 55 insertions(+), 33 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 641e7a7c6..2faa2e3e0 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -70,7 +70,21 @@ endfunction () # e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" # @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) -function(GenerateNamedObjects sources_in float_type_in defines_in name_in use_cblas) +function(GenerateNamedObjects sources_in float_type_in) + + if (DEFINED ARGV2) + set(defines_in ${ARGV2}) + endif () + + if (DEFINED ARGV3) + set(name_in ${ARGV3}) + endif () + + if (DEFINED ARGV4) + set(use_cblas ${ARGV4}) + else () + set(use_cblas 0) + endif () if (DEFINED ARGV5) set(replace_last_with ${ARGV5}) @@ -196,7 +210,6 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_ set(extra_underscore "_") endif () string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name}) - message(STATUS ${alternate_name}) else() set(append_code ${define_code}) # replace_scheme should be 0 endif () diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index d596668c4..a01b37289 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -11,7 +11,7 @@ set(UL_SOURCES syr2_k.c ) -# sources that need to be compiled several times, for UNIT, TRANS +# sources that need to be compiled several times, for UNIT, TRANSA set(NU_SOURCES tbmv_U.c tbsv_U.c @@ -31,43 +31,52 @@ set(NU_SOURCES GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "DOUBLE" "" 1) list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) -# objects that need TRANS and UNIT set +# objects that need TRANSA and UNIT set # N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* because of course why not have a different naming scheme for every single object -hpa -GenerateCombinationObjects("${NU_SOURCES}" "TRANS;UNIT" "N;N" "DOUBLE" "" 3) +GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "DOUBLE" "" 3) list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) # gbmv uses a lowercase n and t. WHY? WHO KNOWS! -GenerateNamedObjects("gbmv_k.c" "DOUBLE" "" "gbmv_n" 0) +GenerateNamedObjects("gbmv_k.c" "DOUBLE" "" "gbmv_n") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("gbmv_k.c" "DOUBLE" "TRANS" "gbmv_t" 0) +GenerateNamedObjects("gbmv_k.c" "DOUBLE" "TRANS" "gbmv_t") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -#if (DEFINED SMP) -# add_library(DBLASOBJS_SMP -# dgemv_thread_n.c dgemv_thread_t.c -# dger_thread.c -# dsymv_thread_U.c dsymv_thread_L.c -# dsyr_thread_U.c dsyr_thread_L.c -# dsyr2_thread_U.c dsyr2_thread_L.c -# dspr_thread_U.c dspr_thread_L.c -# dspr2_thread_U.c dspr2_thread_L.c -# dtrmv_thread_NUU.c dtrmv_thread_NUN.c -# dtrmv_thread_NLU.c dtrmv_thread_NLN.c -# dtrmv_thread_TUU.c dtrmv_thread_TUN.c -# dtrmv_thread_TLU.c dtrmv_thread_TLN.c -# dspmv_thread_U.c dspmv_thread_L.c -# dtpmv_thread_NUU.c dtpmv_thread_NUN.c -# dtpmv_thread_NLU.c dtpmv_thread_NLN.c -# dtpmv_thread_TUU.c dtpmv_thread_TUN.c -# dtpmv_thread_TLU.c dtpmv_thread_TLN.c -# dgbmv_thread_n.c dgbmv_thread_t.c -# dsbmv_thread_U.c dsbmv_thread_L.c -# dtbmv_thread_NUU.c dtbmv_thread_NUN.c -# dtbmv_thread_NLU.c dtbmv_thread_NLN.c -# dtbmv_thread_TUU.c dtbmv_thread_TUN.c -# dtbmv_thread_TLU.c dtbmv_thread_TLN.c -# ) -#endif () +if (SMP) + + # gbmv uses a lowercase n and t + GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "" "gbmv_thread_n") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "TRANS" "gbmv_thread_t") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + + GenerateNamedObjects("ger_thread.c" "DOUBLE") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + + set(UL_SMP_SOURCES + symv_thread.c + syr_thread.c + syr2_thread.c + spr_thread.c + spr2_thread.c + spmv_thread.c + sbmv_thread.c + ) + + GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "DOUBLE" "" 2) + list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) + + + set(NU_SMP_SOURCES + trmv_thread.c + tpmv_thread.c + tbmv_thread.c + ) + + GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "DOUBLE" "" 2) + list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) + +endif () set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 38681fb1c678cdc4b7d8dfac309f4468d3a4c051 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Sat, 7 Feb 2015 12:54:30 -0600 Subject: [PATCH 055/137] Added more kernel files. --- kernel/CMakeLists.txt | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index a36cb8332..6f226b78b 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -7,28 +7,31 @@ include_directories(${CMAKE_SOURCE_DIR}) # Makefile.L1 # these are using hardcoded filenames for now, should get them from the KERNEL vars, e.g. DAMAXKERNEL instead of amax.S -GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "USE_ABS" "amax_k" 0) +GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "USE_ABS" "amax_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "USE_ABS;USE_MIN" "amin_k" 0) +GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "USE_ABS;USE_MIN" "amin_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "" "max_k" 0) +GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "" "max_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "" "min_k" 0) +GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "" "min_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "USE_ABS" "i*amax_k" 0) +GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "USE_ABS" "i*amax_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "USE_ABS;USE_MIN" "i*amin_k" 0) +GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "USE_ABS;USE_MIN" "i*amin_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "" "i*max_k" 0) +GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "" "i*max_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "" "i*min_k" 0) +GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "" "i*min_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/scal.S" "DOUBLE" "" "scal_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) #DBLASOBJS += \ @@ -37,6 +40,16 @@ list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # daxpby_k$(TSUFFIX).$(SUFFIX) # Makefile.L2 + +GenerateNamedObjects("${KERNELDIR}/gemv_n.S" "DOUBLE") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "DOUBLE" "TRANS") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "DOUBLE" "" 1) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) + #DBLASOBJS += \ # dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) dsymv_U$(TSUFFIX).$(SUFFIX) dsymv_L$(TSUFFIX).$(SUFFIX) \ # dger_k$(TSUFFIX).$(SUFFIX) From 2f59135eb6aee046324e91d5a71d338051f8fd9a Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Sat, 7 Feb 2015 21:15:21 -0600 Subject: [PATCH 056/137] Added gemv to level2 CMakeLists.txt. --- driver/level2/CMakeLists.txt | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index a01b37289..cb8b1c949 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -44,10 +44,15 @@ list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) if (SMP) - # gbmv uses a lowercase n and t + # gbmv uses a lowercase n and t. N.B. this uses TRANSA where gbmv.c uses TRANS. Intentional? GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "" "gbmv_thread_n") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "TRANS" "gbmv_thread_t") + GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "TRANSA" "gbmv_thread_t") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + + GenerateNamedObjects("gemv_thread.c" "DOUBLE" "" "gemv_thread_n") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("gemv_thread.c" "DOUBLE" "TRANSA" "gemv_thread_t") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("ger_thread.c" "DOUBLE") From fa0e6a6c937a61df610ce47d863b244d96b4c068 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Sat, 7 Feb 2015 21:37:46 -0600 Subject: [PATCH 057/137] Added the rest of the L1 kernel makefile. --- kernel/CMakeLists.txt | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 6f226b78b..5615bf2c2 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -31,13 +31,32 @@ list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "" "i*min_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("${KERNELDIR}/asum.S" "DOUBLE" "" "asum_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/axpy.S" "DOUBLE" "" "axpy_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/copy.S" "DOUBLE" "C_INTERFACE" "copy_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/dot.S" "DOUBLE" "" "dot_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/nrm2.S" "DOUBLE" "" "nrm2_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("${KERNELDIR}/rot.S" "DOUBLE" "" "rot_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + GenerateNamedObjects("${KERNELDIR}/scal.S" "DOUBLE" "" "scal_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -#DBLASOBJS += \ -# dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \ -# dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \ -# daxpby_k$(TSUFFIX).$(SUFFIX) +GenerateNamedObjects("${KERNELDIR}/swap.S" "DOUBLE" "" "swap_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("arm/axpby.c" "DOUBLE" "" "axpby_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # Makefile.L2 @@ -50,10 +69,8 @@ list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "DOUBLE" "" 1) list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) -#DBLASOBJS += \ -# dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) dsymv_U$(TSUFFIX).$(SUFFIX) dsymv_L$(TSUFFIX).$(SUFFIX) \ -# dger_k$(TSUFFIX).$(SUFFIX) - +GenerateNamedObjects("generic/ger.c" "DOUBLE" "" "ger_k") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # Makefile.L3 #DKERNELOBJS += \ From 7fa5c4e2fd383e75f4e0c74dba0de2efc2b66378 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Sun, 8 Feb 2015 15:29:18 -0600 Subject: [PATCH 058/137] Fixed some case issues with ARCH. Added some kernel and driver/others objects. --- cmake/c_check.cmake | 14 ++++++++++---- driver/others/CMakeLists.txt | 22 +++++++++++++--------- kernel/CMakeLists.txt | 8 ++++++++ 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index aaa3da7bc..e32c18a43 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -56,12 +56,16 @@ endif () # TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) if (${ARCH} STREQUAL "AMD64") - set(ARCH "X86_64") + set(ARCH "x86_64") endif () # If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong -if (${ARCH} STREQUAL "X86_64" AND BINARY EQUAL 32) - set(ARCH X86) +if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32) + set(ARCH x86) +endif () + +if (${ARCH} STREQUAL "X86") + set(ARCH x86) endif () set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) @@ -69,9 +73,11 @@ if (${COMPILER_ID} STREQUAL "GNU") set(COMPILER_ID "GCC") endif () +string(TOUPPER ${ARCH} UC_ARCH) + file(WRITE ${TARGET_CONF} "#define OS_${HOST_OS}\t1\n" - "#define ARCH_${ARCH}\t1\n" + "#define ARCH_${UC_ARCH}\t1\n" "#define C_${COMPILER_ID}\t1\n" "#define __${BINARY}BIT__\t1\n" "#define FUNDERSCORE\t${FU}\n") diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index 0d2a2f486..e14a916b2 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -29,9 +29,7 @@ if (SMP) endif () endif () -add_library(COMMON_OBJS OBJECT - ${MEMORY} - ${SMP_SOURCES} +set(COMMON_SOURCES xerbla.c abs.c # TODO: this is split into c_abs (DOUBLE unset) and z_abs (DOUBLE set) in the Makefile openblas_set_num_threads.c @@ -40,12 +38,12 @@ add_library(COMMON_OBJS OBJECT openblas_error_handle.c ) -#ifeq ($(DYNAMIC_ARCH), 1) -#COMMONOBJS += dynamic.$(SUFFIX) -#else -#COMMONOBJS += parameter.$(SUFFIX) -#endif -# +if (DYNAMIC_ARCH) + list(APPEND COMMON_SOURCES dynamic.c) +else () + list(APPEND COMMON_SOURCES parameter.c) +endif () + #ifdef EXPRECISION #COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX) #endif @@ -62,6 +60,12 @@ add_library(COMMON_OBJS OBJECT #COMMONOBJS += profile.$(SUFFIX) #endif +add_library(COMMON_OBJS OBJECT + ${MEMORY} + ${SMP_SOURCES} + ${COMMON_SOURCES} +) + list(APPEND DBLAS_OBJS "COMMON_OBJS") #LIBOTHERS = libothers.$(LIBSUFFIX) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 5615bf2c2..a6eb14e73 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -3,6 +3,14 @@ include_directories(${CMAKE_SOURCE_DIR}) # TODO: need to read ${KERNELDIR}/KERNEL into CMake vars +# Makeflie + +message(STATUS "${ARCH}") +if (${ARCH} STREQUAL "x86") + GenerateNamedObjects("${KERNELDIR}/cpuid.S" "") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +endif () + # Makefile.L1 From 4c65afcce175bd93041b85f1bd1cfa128ade12e2 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 9 Feb 2015 09:52:14 -0600 Subject: [PATCH 059/137] Changed kernel filenames to vars. These will need to be read from KERNEL. Added some kernel/L3 objects. --- kernel/CMakeLists.txt | 105 +++++++++++++++++++++++++++++------------- 1 file changed, 74 insertions(+), 31 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index a6eb14e73..31839b054 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -1,11 +1,8 @@ include_directories(${CMAKE_SOURCE_DIR}) -# TODO: need to read ${KERNELDIR}/KERNEL into CMake vars - # Makeflie -message(STATUS "${ARCH}") if (${ARCH} STREQUAL "x86") GenerateNamedObjects("${KERNELDIR}/cpuid.S" "") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) @@ -14,56 +11,74 @@ endif () # Makefile.L1 -# these are using hardcoded filenames for now, should get them from the KERNEL vars, e.g. DAMAXKERNEL instead of amax.S -GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "USE_ABS" "amax_k") +# TODO: need to read ${KERNELDIR}/KERNEL into CMake vars +set(DAMAXKERNEL amax.S) +set(DAMINKERNEL amax.S) +set(DMAXKERNEL amax.S) +set(DMINKERNEL amax.S) +set(IDAMAXKERNEL iamax.S) +set(IDAMINKERNEL iamax.S) +set(IDMAXKERNEL iamax.S) +set(IDMINKERNEL iamax.S) +set(DASUMKERNEL asum.S) +set(DAXPYKERNEL axpy.S) +set(DCOPYKERNEL copy.S) +set(DDOTKERNEL dot.S) +set(DNRM2KERNEL nrm2.S) +set(DROTKERNEL rot.S) +set(DSCALKERNEL scal.S) +set(DSWAPKERNEL swap.S) +set(DAXPBYKERNEL ../arm/axpby.c) + +GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "DOUBLE" "USE_ABS" "amax_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "USE_ABS;USE_MIN" "amin_k") +GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "DOUBLE" "USE_ABS;USE_MIN" "amin_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "" "max_k") +GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "DOUBLE" "" "max_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/amax.S" "DOUBLE" "" "min_k") +GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "DOUBLE" "" "min_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "USE_ABS" "i*amax_k") +GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "DOUBLE" "USE_ABS" "i*amax_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "USE_ABS;USE_MIN" "i*amin_k") +GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "DOUBLE" "USE_ABS;USE_MIN" "i*amin_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "" "i*max_k") +GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "DOUBLE" "" "i*max_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/iamax.S" "DOUBLE" "" "i*min_k") +GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "DOUBLE" "" "i*min_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/asum.S" "DOUBLE" "" "asum_k") +GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "DOUBLE" "" "asum_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/axpy.S" "DOUBLE" "" "axpy_k") +GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "DOUBLE" "" "axpy_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/copy.S" "DOUBLE" "C_INTERFACE" "copy_k") +GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "DOUBLE" "C_INTERFACE" "copy_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/dot.S" "DOUBLE" "" "dot_k") +GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "DOUBLE" "" "dot_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/nrm2.S" "DOUBLE" "" "nrm2_k") +GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "DOUBLE" "" "nrm2_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/rot.S" "DOUBLE" "" "rot_k") +GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "DOUBLE" "" "rot_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/scal.S" "DOUBLE" "" "scal_k") +GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "DOUBLE" "" "scal_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("${KERNELDIR}/swap.S" "DOUBLE" "" "swap_k") +GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "DOUBLE" "" "swap_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -GenerateNamedObjects("arm/axpby.c" "DOUBLE" "" "axpby_k") +GenerateNamedObjects("${KERNELDIR}/${DAXPBYKERNEL}" "DOUBLE" "" "axpby_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # Makefile.L2 @@ -81,17 +96,45 @@ GenerateNamedObjects("generic/ger.c" "DOUBLE" "" "ger_k") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # Makefile.L3 -#DKERNELOBJS += \ -# dgemm_kernel$(TSUFFIX).$(SUFFIX) \ -# $(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \ -# $(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ) -#DBLASOBJS += \ -# dgemm_beta$(TSUFFIX).$(SUFFIX) \ -# dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ -# dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ -# dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ -# dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ +set(DGEMM_BETA ../generic/gemm_beta.c) +set(DGEMMKERNEL gemm_kernel_2x4_penryn.S) + +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +if (DGEMMINCOPY) + GenerateNamedObjects(${DGEMMINCOPY} "DOUBLE") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +endif () + +if (DGEMMITCOPY) + GenerateNamedObjects(${DGEMMITCOPY} "DOUBLE") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +endif () + +if (DGEMMONCOPY) + GenerateNamedObjects(${DGEMMONCOPY} "DOUBLE") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +endif () + +if (DGEMMOTCOPY) + GenerateNamedObjects(${DGEMMOTCOPY} "DOUBLE") + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +endif () + +GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "DOUBLE") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateCombinationObjects("${KERNELDIR}/${DGEMMKERNEL}" "LEFT;TRANSA" "R;N" "DOUBLE" "TRMMKERNEL" 2 "trmm_kernel") +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "LT;TRSMKERNEL" "trsm_kernel_LT") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "RT;TRSMKERNEL" "trsm_kernel_RT") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) #DBLASOBJS += \ # dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ From f992799226ca30c363be60864e6b44df696f51eb Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 9 Feb 2015 10:47:35 -0600 Subject: [PATCH 060/137] Added the rest of Makefile.L3. --- kernel/CMakeLists.txt | 126 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 104 insertions(+), 22 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 31839b054..aa38cdc85 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -136,29 +136,111 @@ list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "RT;TRSMKERNEL" "trsm_kernel_RT") list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -#DBLASOBJS += \ -# dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ -# dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ -# dtrmm_iutucopy$(TSUFFIX).$(SUFFIX) dtrmm_iutncopy$(TSUFFIX).$(SUFFIX) \ -# dtrmm_iltucopy$(TSUFFIX).$(SUFFIX) dtrmm_iltncopy$(TSUFFIX).$(SUFFIX) \ -# dtrmm_ounucopy$(TSUFFIX).$(SUFFIX) dtrmm_ounncopy$(TSUFFIX).$(SUFFIX) \ -# dtrmm_olnucopy$(TSUFFIX).$(SUFFIX) dtrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ -# dtrmm_outucopy$(TSUFFIX).$(SUFFIX) dtrmm_outncopy$(TSUFFIX).$(SUFFIX) \ -# dtrmm_oltucopy$(TSUFFIX).$(SUFFIX) dtrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ -# dtrsm_iunucopy$(TSUFFIX).$(SUFFIX) dtrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ -# dtrsm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ -# dtrsm_iutucopy$(TSUFFIX).$(SUFFIX) dtrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ -# dtrsm_iltucopy$(TSUFFIX).$(SUFFIX) dtrsm_iltncopy$(TSUFFIX).$(SUFFIX) \ -# dtrsm_ounucopy$(TSUFFIX).$(SUFFIX) dtrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ -# dtrsm_olnucopy$(TSUFFIX).$(SUFFIX) dtrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ -# dtrsm_outucopy$(TSUFFIX).$(SUFFIX) dtrsm_outncopy$(TSUFFIX).$(SUFFIX) \ -# dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ -# dsymm_iutcopy$(TSUFFIX).$(SUFFIX) dsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ -# dsymm_outcopy$(TSUFFIX).$(SUFFIX) dsymm_oltcopy$(TSUFFIX).$(SUFFIX) +# These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. +# Could simplify it a bit by pairing up by -UUNIT/-DUNIT. +GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trmm_iunucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trmm_iunncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trmm_ounucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trmm_ounncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) -#DBLASOBJS += \ -# domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ -# domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) +GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trmm_ilnucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trmm_ilnncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trmm_olnucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trmm_olnncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trmm_iutucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trmm_iutncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trmm_outucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trmm_outncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trmm_iltucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trmm_iltncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trmm_oltucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trmm_oltncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trsm_iunucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trsm_iunncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trsm_ounucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trsm_ounncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trsm_ilnucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trsm_ilnncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trsm_olnucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trsm_olnncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trsm_iutucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trsm_iutncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trsm_outucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trsm_outncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trsm_iltucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trsm_iltncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trsm_oltucopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trsm_oltncopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "OUTER" "symm_outcopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "" "symm_iutcopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "LOWER;OUTER" "symm_oltcopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "LOWER" "symm_iltcopy") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + +if (NOT DEFINED DOMATCOPY_CN) + set(DOMATCOPY_CN ../arm/omatcopy_cn.c) +endif () +if (NOT DEFINED DOMATCOPY_RN) + set(DOMATCOPY_RN ../arm/omatcopy_rn.c) +endif () +if (NOT DEFINED DOMATCOPY_CT) + set(DOMATCOPY_CT ../arm/omatcopy_ct.c) +endif () +if (NOT DEFINED DOMATCOPY_RT) + set(DOMATCOPY_RT ../arm/omatcopy_rt.c) +endif () + +GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CN}" "DOUBLE" "" "domatcopy_k_cn") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RN}" "DOUBLE" "ROWM" "domatcopy_k_rn") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CT}" "DOUBLE" "" "domatcopy_k_ct") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) +GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RT}" "DOUBLE" "ROWM" "domatcopy_k_rt") +list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # Makefile.LA #DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX) From e8c39138c64815843db1e0e11b349da060d48b52 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 9 Feb 2015 12:28:09 -0600 Subject: [PATCH 061/137] Removed return value from GenerateNamedObjects. It sets DBLAS_OBJS directly to save a bunch of list appending in the CMakeLists.txt files. --- cmake/utils.cmake | 6 +++-- driver/level2/CMakeLists.txt | 12 --------- driver/level3/CMakeLists.txt | 9 ------- interface/CMakeLists.txt | 13 --------- kernel/CMakeLists.txt | 51 ------------------------------------ lapack/CMakeLists.txt | 4 --- 6 files changed, 4 insertions(+), 91 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 2faa2e3e0..9b5d7de14 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -138,7 +138,9 @@ function(GenerateNamedObjects sources_in float_type_in) list(APPEND OBJ_LIST_OUT ${obj_name}) endforeach () - set(OBJ_LIST_OUT ${OBJ_LIST_OUT} PARENT_SCOPE) + + list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) + set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) endfunction () # generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in @@ -220,6 +222,6 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_ endforeach () endforeach () - set(COMBO_OBJ_LIST_OUT ${COMBO_OBJ_LIST_OUT} PARENT_SCOPE) + set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) endfunction () diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index cb8b1c949..d8f8123d3 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -29,34 +29,25 @@ set(NU_SOURCES # objects that need LOWER set GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "DOUBLE" "" 1) -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) # objects that need TRANSA and UNIT set # N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* because of course why not have a different naming scheme for every single object -hpa GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "DOUBLE" "" 3) -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) # gbmv uses a lowercase n and t. WHY? WHO KNOWS! GenerateNamedObjects("gbmv_k.c" "DOUBLE" "" "gbmv_n") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("gbmv_k.c" "DOUBLE" "TRANS" "gbmv_t") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) if (SMP) # gbmv uses a lowercase n and t. N.B. this uses TRANSA where gbmv.c uses TRANS. Intentional? GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "" "gbmv_thread_n") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "TRANSA" "gbmv_thread_t") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("gemv_thread.c" "DOUBLE" "" "gemv_thread_n") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("gemv_thread.c" "DOUBLE" "TRANSA" "gemv_thread_t") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("ger_thread.c" "DOUBLE") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) set(UL_SMP_SOURCES symv_thread.c @@ -69,8 +60,6 @@ if (SMP) ) GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "DOUBLE" "" 2) - list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) - set(NU_SMP_SOURCES trmv_thread.c @@ -79,7 +68,6 @@ if (SMP) ) GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "DOUBLE" "" 2) - list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) endif () diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 687664949..b9a817323 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -27,33 +27,24 @@ set(GEMM_DEFINES NN NT TN TT) foreach (GEMM_DEFINE ${GEMM_DEFINES}) string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () endforeach () GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "DOUBLE" "" 0) -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "DOUBLE" "NN" 1) -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "U;N" "DOUBLE" "" 1) -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "DOUBLE" "" 2) -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) if (SMP) # N.B. these do NOT have a float type (e.g. DOUBLE) defined! GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" "" 0) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) if (NOT USE_SIMPLE_THREADED_LEVEL3) GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "DOUBLE" "THREADED_LEVEL3" 2 "syrk_thread") - list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "DOUBLE" "THREADED_LEVEL3;NN" 2 "symm_thread") - list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) endif () endif () diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 90fca8b8f..1b0ac42d6 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -41,34 +41,22 @@ endif () foreach (CBLAS_FLAG ${CBLAS_FLAGS}) GenerateNamedObjects("${BLAS1_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${BLAS2_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${BLAS3_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # trmm is trsm with a compiler flag set GenerateNamedObjects("trsm.c" "DOUBLE" "TRMM" "trmm" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # max and imax are compiled 4 times GenerateNamedObjects("max.c" "DOUBLE" "" "" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS" "amax" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("max.c" "DOUBLE" "USE_MIN" "min" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("imax.c" "DOUBLE" "" "i*max" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS" "i*amax" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("imax.c" "DOUBLE" "USE_MIN" "i*min" ${CBLAS_FLAG}) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endforeach () @@ -79,7 +67,6 @@ if (NOT DEFINED NO_LAPACK) lapack/lauum.c lapack/trti2.c lapack/trtri.c ) GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE" "" "" 0) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index aa38cdc85..329bf5375 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -101,124 +101,77 @@ set(DGEMM_BETA ../generic/gemm_beta.c) set(DGEMMKERNEL gemm_kernel_2x4_penryn.S) GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) if (DGEMMINCOPY) GenerateNamedObjects(${DGEMMINCOPY} "DOUBLE") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () if (DGEMMITCOPY) GenerateNamedObjects(${DGEMMITCOPY} "DOUBLE") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () if (DGEMMONCOPY) GenerateNamedObjects(${DGEMMONCOPY} "DOUBLE") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () if (DGEMMOTCOPY) GenerateNamedObjects(${DGEMMOTCOPY} "DOUBLE") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "DOUBLE") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateCombinationObjects("${KERNELDIR}/${DGEMMKERNEL}" "LEFT;TRANSA" "R;N" "DOUBLE" "TRMMKERNEL" 2 "trmm_kernel") -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "LT;TRSMKERNEL" "trsm_kernel_LT") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "RT;TRSMKERNEL" "trsm_kernel_RT") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trmm_iunucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trmm_iunncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trmm_ounucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trmm_ounncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trmm_ilnucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trmm_ilnncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trmm_olnucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trmm_olnncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trmm_iutucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trmm_iutncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trmm_outucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trmm_outncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trmm_iltucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trmm_iltncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trmm_oltucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trmm_oltncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trsm_iunucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trsm_iunncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trsm_ounucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trsm_ounncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trsm_ilnucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trsm_ilnncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trsm_olnucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trsm_olnncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trsm_iutucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trsm_iutncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trsm_outucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trsm_outncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trsm_iltucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trsm_iltncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trsm_oltucopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trsm_oltncopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "OUTER" "symm_outcopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "" "symm_iutcopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "LOWER;OUTER" "symm_oltcopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "LOWER" "symm_iltcopy") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) if (NOT DEFINED DOMATCOPY_CN) set(DOMATCOPY_CN ../arm/omatcopy_cn.c) @@ -234,13 +187,9 @@ if (NOT DEFINED DOMATCOPY_RT) endif () GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CN}" "DOUBLE" "" "domatcopy_k_cn") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RN}" "DOUBLE" "ROWM" "domatcopy_k_rn") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CT}" "DOUBLE" "" "domatcopy_k_ct") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RT}" "DOUBLE" "ROWM" "domatcopy_k_rt") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # Makefile.LA #DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 664ce6d6e..5070e0c05 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -50,7 +50,6 @@ set(ZLAPACK_SOURCES ) GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE" "" "" 0) -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) if (SMP) @@ -89,13 +88,10 @@ if (SMP) ) GenerateNamedObjects("${PARALLEL_SOURCES}" "DOUBLE" "" "" 0) - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "DOUBLE" "" 4) -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "DOUBLE" "" 4) -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 4bfaf1ce6612542ce71f668a1901005da18792c1 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 9 Feb 2015 12:56:55 -0600 Subject: [PATCH 062/137] Removed some list appends I missed. --- kernel/CMakeLists.txt | 41 ----------------------------------------- 1 file changed, 41 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 329bf5375..522fac349 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -5,7 +5,6 @@ include_directories(${CMAKE_SOURCE_DIR}) if (${ARCH} STREQUAL "x86") GenerateNamedObjects("${KERNELDIR}/cpuid.S" "") - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () @@ -31,69 +30,29 @@ set(DSWAPKERNEL swap.S) set(DAXPBYKERNEL ../arm/axpby.c) GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "DOUBLE" "USE_ABS" "amax_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "DOUBLE" "USE_ABS;USE_MIN" "amin_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "DOUBLE" "" "max_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "DOUBLE" "" "min_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "DOUBLE" "USE_ABS" "i*amax_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "DOUBLE" "USE_ABS;USE_MIN" "i*amin_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "DOUBLE" "" "i*max_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "DOUBLE" "" "i*min_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "DOUBLE" "" "asum_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "DOUBLE" "" "axpy_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "DOUBLE" "C_INTERFACE" "copy_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "DOUBLE" "" "dot_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "DOUBLE" "" "nrm2_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "DOUBLE" "" "rot_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "DOUBLE" "" "scal_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "DOUBLE" "" "swap_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/${DAXPBYKERNEL}" "DOUBLE" "" "axpby_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # Makefile.L2 GenerateNamedObjects("${KERNELDIR}/gemv_n.S" "DOUBLE") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "DOUBLE" "TRANS") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "DOUBLE" "" 1) -list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) - GenerateNamedObjects("generic/ger.c" "DOUBLE" "" "ger_k") -list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) # Makefile.L3 From c0624a26befe30f506a89b39dd5fc2ace726d3a0 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 9 Feb 2015 14:34:29 -0600 Subject: [PATCH 063/137] Fixed some dgemm_copy function names. --- kernel/CMakeLists.txt | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 522fac349..8bc325f17 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -56,25 +56,34 @@ GenerateNamedObjects("generic/ger.c" "DOUBLE" "" "ger_k") # Makefile.L3 +# TODO: these are from KERNEL.PENRYN - they should be read in from the appropriate ${KERNELDIR}/KERNEL file set(DGEMM_BETA ../generic/gemm_beta.c) set(DGEMMKERNEL gemm_kernel_2x4_penryn.S) +set(DGEMMINCOPY gemm_ncopy_2.S) +set(DGEMMITCOPY gemm_tcopy_2.S) +set(DGEMMONCOPY ../generic/gemm_ncopy_4.c) +set(DGEMMOTCOPY ../generic/gemm_tcopy_4.c) +set(DGEMMINCOPYOBJ gemm_incopy) +set(DGEMMITCOPYOBJ gemm_itcopy) +set(DGEMMONCOPYOBJ gemm_oncopy) +set(DGEMMOTCOPYOBJ gemm_otcopy) -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE") +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "" "gemm_kernel") if (DGEMMINCOPY) - GenerateNamedObjects(${DGEMMINCOPY} "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "" "${DGEMMINCOPYOBJ}") endif () if (DGEMMITCOPY) - GenerateNamedObjects(${DGEMMITCOPY} "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "" "${DGEMMITCOPYOBJ}") endif () if (DGEMMONCOPY) - GenerateNamedObjects(${DGEMMONCOPY} "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "" "${DGEMMONCOPYOBJ}") endif () if (DGEMMOTCOPY) - GenerateNamedObjects(${DGEMMOTCOPY} "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "" "${DGEMMOTCOPYOBJ}") endif () GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "DOUBLE") From 6ddbfea7004ab3c16e35491c2f8ac066e09ffa7a Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 9 Feb 2015 15:15:58 -0600 Subject: [PATCH 064/137] Added generic laswp object. --- lapack/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 5070e0c05..7f37d49b3 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -1,7 +1,6 @@ include_directories(${CMAKE_SOURCE_DIR}) -# TODO: laswp needs arch specific code # TODO: getrs needs to be compiled with and without TRANS (and up to TRANS=4 in the complex case) # TODO: trti2 needs to be compiled with and without UNIT @@ -49,7 +48,11 @@ set(ZLAPACK_SOURCES trtri/trtri_L_single.c ) -GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE" "" "" 0) +GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE") + +# TODO: laswp needs arch specific code +GenerateNamedObjects("laswp/generic/laswp_k.c" "DOUBLE" "" "laswp_plus") +GenerateNamedObjects("laswp/generic/laswp_k.c" "DOUBLE" "MINUS" "laswp_minus") if (SMP) From 3b20b62423150ea0eb2a4e2c9d296a527c30e827 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 9 Feb 2015 15:29:28 -0600 Subject: [PATCH 065/137] Fixed trti2 name. --- lapack/CMakeLists.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 7f37d49b3..346f96e34 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -1,8 +1,6 @@ include_directories(${CMAKE_SOURCE_DIR}) -# TODO: getrs needs to be compiled with and without TRANS (and up to TRANS=4 in the complex case) -# TODO: trti2 needs to be compiled with and without UNIT set(LAPACK_SOURCES getf2/getf2_k.c @@ -15,8 +13,6 @@ set(LAPACK_SOURCES lauu2/lauu2_L.c lauum/lauum_U_single.c lauum/lauum_L_single.c - trti2/trti2_U.c - trti2/trti2_L.c ) # sources that need TRANS set @@ -30,6 +26,12 @@ set(UNIT_SOURCES trtri/trtri_L_single.c ) +set(UNIT_SOURCES2 + trti2/trti2_U.c + trti2/trti2_L.c +) + +# TODO: getrs needs to be compiled with up to TRANS=4 in the complex case set(ZLAPACK_SOURCES getf2/zgetf2_k.c getrf/getrf_single.c @@ -95,6 +97,7 @@ endif () GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "DOUBLE" "" 4) GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "DOUBLE" "" 4) +GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "DOUBLE" "" 0) set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 96cf6779ca9f28490affebf3a74ccf90cc8e8246 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 10 Feb 2015 11:01:01 -0600 Subject: [PATCH 066/137] Added DLA sources from lapack-netlib. Can't use the lapack-netlib cmake files, since they are designed to build a complete lapack/blas library. They have their own fortran detection and flag setup and so on. Instead I'll just recreate the makefiles I need. Fixed a typo in the NAME defines. --- CMakeLists.txt | 13 +++++++++++++ cmake/utils.cmake | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 41cb52b23..b8e1d6ad1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -66,6 +66,19 @@ foreach (DBLAS_OBJ ${DBLAS_OBJS}) set_target_properties(${DBLAS_OBJ} PROPERTIES COMPILE_DEFINITIONS "${PREV_DEFS};DOUBLE") list(APPEND TARGET_OBJS "$") endforeach () + +# netlib: + +# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. + +# N.B. if a source is added or removed you MUST re-run the cmake command manually; make will not do it. +file(GLOB DLA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/d*.f") + +add_library(DLA_OBJ OBJECT ${DLA_SOURCES}) +set_target_properties(${DLA_OBJ} PROPERTIES COMPILE_FLAGS ${LAPACK_FFLAGS}) +list(APPEND TARGET_OBJS "$") + +# add objects to the openblas lib add_library(openblas ${TARGET_OBJS}) # TODO: Why is the config saved here? Is this necessary with CMake? diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 9b5d7de14..a6952c833 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -128,7 +128,7 @@ function(GenerateNamedObjects sources_in float_type_in) list(APPEND obj_defines "CBLAS") endif () - list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") + list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") list(APPEND obj_defines ${defines_in}) list(APPEND obj_defines ${float_type_in}) From 8743093bd7b6e87c27564c5ca01c77ee0a655aa8 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 10 Feb 2015 11:47:46 -0600 Subject: [PATCH 067/137] Added aux files from lapack-netlib. --- CMakeLists.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index b8e1d6ad1..d82ef61ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,6 +71,16 @@ endforeach () # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. +# aux files +set(ALLAUX + ${NETLIB_LAPACK_DIR}/SRC/ilaenv.f ${NETLIB_LAPACK_DIR}/SRC/ieeeck.f ${NETLIB_LAPACK_DIR}/SRC/lsamen.f ${NETLIB_LAPACK_DIR}/SRC/xerbla_array.f ${NETLIB_LAPACK_DIR}/SRC/iparmq.f + ${NETLIB_LAPACK_DIR}/SRC/ilaprec.f ${NETLIB_LAPACK_DIR}/SRC/ilatrans.f ${NETLIB_LAPACK_DIR}/SRC/ilauplo.f ${NETLIB_LAPACK_DIR}/SRC/iladiag.f ${NETLIB_LAPACK_DIR}/SRC/chla_transtype.f + ${NETLIB_LAPACK_DIR}/INSTALL/ilaver.f ${NETLIB_LAPACK_DIR}/INSTALL/slamch.f +) +add_library(AUX_OBJ OBJECT ${ALLAUX}) +set_target_properties(${AUX_OBJ} PROPERTIES COMPILE_FLAGS ${LAPACK_FFLAGS}) +list(APPEND TARGET_OBJS "$") + # N.B. if a source is added or removed you MUST re-run the cmake command manually; make will not do it. file(GLOB DLA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/d*.f") From 162791e30e5c725360d5c0de4c5c38f7643d4055 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 10 Feb 2015 12:42:05 -0600 Subject: [PATCH 068/137] Added common objects from kernel Makefile. --- cmake/utils.cmake | 4 +++- kernel/CMakeLists.txt | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index a6952c833..672dcad33 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -130,7 +130,9 @@ function(GenerateNamedObjects sources_in float_type_in) list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") list(APPEND obj_defines ${defines_in}) - list(APPEND obj_defines ${float_type_in}) + if (NOT ${float_type_in} STREQUAL "SINGLE") + list(APPEND obj_defines ${float_type_in}) + endif () add_library(${obj_name} OBJECT ${source_file}) set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${obj_defines}") diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 8bc325f17..a83bd0dbe 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -7,6 +7,14 @@ if (${ARCH} STREQUAL "x86") GenerateNamedObjects("${KERNELDIR}/cpuid.S" "") endif () +# TODO: Read from ${KERNELDIR}/KERNEL - some architectures use a different lsame +set(LSAME_KERNEL lsame.S) +set(SCABS_KERNEL cabs.S) +set(DCABS_KERNEL cabs.S) +GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "" "F_INTERFACE" "lsame") +GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "SINGLE" "COMPLEX;F_INTERFACE" "cabs1") +GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE" "COMPLEX;F_INTERFACE" "cabs1") + # Makefile.L1 From 64b5a0ef84c5ec1f0b7a7bf083a1b20dd015ed77 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 10 Feb 2015 14:29:05 -0600 Subject: [PATCH 069/137] Added AUX files from lapack-netlib. --- CMakeLists.txt | 24 +++-------- cmake/fc.cmake | 7 ++++ cmake/lapack.cmake | 100 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 18 deletions(-) create mode 100644 cmake/lapack.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index d82ef61ea..9db677d35 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${Open enable_language(Fortran) enable_language(ASM) -message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with.") +message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only DOUBLE and x86 support is currently available.") include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake") include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") @@ -70,23 +70,11 @@ endforeach () # netlib: # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. - -# aux files -set(ALLAUX - ${NETLIB_LAPACK_DIR}/SRC/ilaenv.f ${NETLIB_LAPACK_DIR}/SRC/ieeeck.f ${NETLIB_LAPACK_DIR}/SRC/lsamen.f ${NETLIB_LAPACK_DIR}/SRC/xerbla_array.f ${NETLIB_LAPACK_DIR}/SRC/iparmq.f - ${NETLIB_LAPACK_DIR}/SRC/ilaprec.f ${NETLIB_LAPACK_DIR}/SRC/ilatrans.f ${NETLIB_LAPACK_DIR}/SRC/ilauplo.f ${NETLIB_LAPACK_DIR}/SRC/iladiag.f ${NETLIB_LAPACK_DIR}/SRC/chla_transtype.f - ${NETLIB_LAPACK_DIR}/INSTALL/ilaver.f ${NETLIB_LAPACK_DIR}/INSTALL/slamch.f -) -add_library(AUX_OBJ OBJECT ${ALLAUX}) -set_target_properties(${AUX_OBJ} PROPERTIES COMPILE_FLAGS ${LAPACK_FFLAGS}) -list(APPEND TARGET_OBJS "$") - -# N.B. if a source is added or removed you MUST re-run the cmake command manually; make will not do it. -file(GLOB DLA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/d*.f") - -add_library(DLA_OBJ OBJECT ${DLA_SOURCES}) -set_target_properties(${DLA_OBJ} PROPERTIES COMPILE_FLAGS ${LAPACK_FFLAGS}) -list(APPEND TARGET_OBJS "$") +# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. +include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") +add_library(LA_OBJ OBJECT ${LA_SOURCES}) +set_target_properties(LA_OBJ PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") +list(APPEND TARGET_OBJS "$") # add objects to the openblas lib add_library(openblas ${TARGET_OBJS}) diff --git a/cmake/fc.cmake b/cmake/fc.cmake index a47865b63..ba156c210 100644 --- a/cmake/fc.cmake +++ b/cmake/fc.cmake @@ -191,3 +191,10 @@ if (${F_COMPILER} STREQUAL "COMPAQ") endif () endif () +# from the root Makefile - this is for lapack-netlib to compile the correct secnd file. +if (${F_COMPILER} STREQUAL "GFORTRAN") + set(TIMER "INT_ETIME") +else () + set(TIMER "NONE") +endif () + diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake new file mode 100644 index 000000000..14581a9b2 --- /dev/null +++ b/cmake/lapack.cmake @@ -0,0 +1,100 @@ +# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. + +set(ALLAUX + ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f + ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f + ../INSTALL/ilaver.f ../INSTALL/slamch.f +) + +set(DZLAUX + dbdsdc.f + dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f + dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f + dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f + dlagts.f dlamrg.f dlanst.f + dlapy2.f dlapy3.f dlarnv.f + dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f + dlarrk.f dlarrr.f dlaneg.f + dlartg.f dlaruv.f dlas2.f dlascl.f + dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f + dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f + dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f + dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f + dsteqr.f dsterf.f dlaisnan.f disnan.f + dlartgp.f dlartgs.f + ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f +) + +set(DLASRC + dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f + dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f + dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f + dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f + dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f + dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f + dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f + dgetc2.f dgetri.f + dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f + dggglm.f dgghrd.f dgglse.f dggqrf.f + dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f + dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f + dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f + dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f + dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f + dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f + dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f + dlapll.f dlapmt.f + dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f + dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f + dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f + dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f + dlargv.f dlarrv.f dlartv.f + dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f + dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f + dopgtr.f dopmtr.f dorg2l.f dorg2r.f + dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f + dorgrq.f dorgtr.f dorm2l.f dorm2r.f + dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f + dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f + dpbstf.f dpbsv.f dpbsvx.f + dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f + dposvx.f dpotrs.f dpstrf.f dpstf2.f + dppcon.f dppequ.f + dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f + dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f + dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f + dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f + dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f + dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f + dstevx.f + dsycon.f dsyev.f dsyevd.f dsyevr.f + dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f + dsysv.f dsysvx.f + dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f + dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f + dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f + dsytri_rook.f dsycon_rook.f dsysv_rook.f + dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f + dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f + dtptrs.f + dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f + dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f + dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f + dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f + dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f + dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f + dgeequb.f dsyequb.f dpoequb.f dgbequb.f + dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f + dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f + dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f + dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f +) + +set(LA_REL_SRC ${ALLAUX} ${DZLAUX} ${DLASRC}) + +# add lapack-netlib folder to the sources +set(LA_SOURCES "") +foreach (LA_FILE ${LA_REL_SRC}) + list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}") +endforeach () + From d60b49e5c564ec7e9d1a159af86833730a27c9c7 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 10 Feb 2015 14:36:43 -0600 Subject: [PATCH 070/137] Turned off uninizialized variable warning when compiling lapack-netlib. --- cmake/system.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index 2a0678f83..3d58fa2e3 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -342,6 +342,11 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") endforeach () endif () +if (${F_COMPILER} STREQUAL "GFORTRAN") + # lapack-netlib is rife with uninitialized warnings -hpa + set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized") +endif () + set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H") if (INTERFACE64) set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64") From c94fe71278ad31606a84699f33832779a38f2520 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 11 Feb 2015 10:54:14 -0600 Subject: [PATCH 071/137] Removed incoming-stack-boundary for MSVC. Made float type optional for GenerateNamedObjects. Called GenerateNamedObjects for a couple of driver/others files that needed NAME/CNAME set. --- cmake/os.cmake | 4 +++- cmake/utils.cmake | 8 ++++++-- driver/others/CMakeLists.txt | 8 +++++--- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cmake/os.cmake b/cmake/os.cmake index cf36ef62f..eb7df31ed 100644 --- a/cmake/os.cmake +++ b/cmake/os.cmake @@ -57,7 +57,9 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") # Ensure the correct stack alignment on Win32 # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 if (${ARCH} STREQUAL "x86") - set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") + if (NOT MSVC) + set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") + endif () set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2") endif () diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 672dcad33..715f298b7 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -70,7 +70,11 @@ endfunction () # e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" # @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) -function(GenerateNamedObjects sources_in float_type_in) +function(GenerateNamedObjects sources_in) + + if (DEFINED ARGV1) + set(float_type_in ${ARGV1}) + endif () if (DEFINED ARGV2) set(defines_in ${ARGV2}) @@ -97,7 +101,7 @@ function(GenerateNamedObjects sources_in float_type_in) set(OBJ_LIST_OUT "") foreach (source_file ${sources_in}) - if (NOT float_type_in STREQUAL "") + if (DEFINED float_type_in AND NOT float_type_in STREQUAL "") string(SUBSTRING ${float_type_in} 0 1 float_char) string(TOLOWER ${float_char} float_char) endif () diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index e14a916b2..7f8672eb2 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -31,13 +31,15 @@ endif () set(COMMON_SOURCES xerbla.c - abs.c # TODO: this is split into c_abs (DOUBLE unset) and z_abs (DOUBLE set) in the Makefile openblas_set_num_threads.c - openblas_get_config.c - openblas_get_parallel.c openblas_error_handle.c ) +# these need to have NAME/CNAME set, so use GenerateNamedObjects +GenerateNamedObjects("abs.c" "" "" "c_abs") +GenerateNamedObjects("abs.c" "" "DOUBLE" "z_abs") +GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c") + if (DYNAMIC_ARCH) list(APPEND COMMON_SOURCES dynamic.c) else () From 5d3fc092e9eae1982ae9947066546131c57f2fbe Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 11 Feb 2015 11:10:45 -0600 Subject: [PATCH 072/137] Added MSVC defines to common.h. Don't have unistd.h in MSVC. Chagned YIELDING to use the YeildProcessor macro. --- common.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/common.h b/common.h index fe2083469..0761f5536 100644 --- a/common.h +++ b/common.h @@ -82,7 +82,10 @@ extern "C" { #include #include #include + +#if !defined(_MSC_VER) #include +#endif #ifdef OS_LINUX #include @@ -307,8 +310,12 @@ typedef int blasint; #endif #if defined(OS_WINDOWS) +#ifdef _MSC_VER +#define YIELDING YieldProcessor() +#else #define YIELDING SwitchToThread() #endif +#endif #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5) #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); From a0d9a7fd833f5c5e14b2b30d9518f6a70c40c3f3 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 11 Feb 2015 11:11:47 -0600 Subject: [PATCH 073/137] Changed _Complex types in common_level1.h to use the typedef. --- common_level1.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/common_level1.h b/common_level1.h index 2a1b4f1cf..32ffd6f18 100644 --- a/common_level1.h +++ b/common_level1.h @@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG); double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG); xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); -float _Complex cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); -float _Complex cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); -double _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); -double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); -xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); -xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); +openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); +openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); +openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); +openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); +openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); +openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); From 056ba2675556722bb5180f5b4cab93559267b516 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 11 Feb 2015 11:13:17 -0600 Subject: [PATCH 074/137] Changed a number of inline calls to use __inline. MSVC doesn't inmplement C99, so can't use the inline keyword. __inline appears to work in MSVC and GCC. --- common_x86.h | 2 +- driver/level3/syr2k_k.c | 2 +- driver/level3/syrk_k.c | 2 +- lapack/getrf/getrf_parallel.c | 2 +- symcopy.h | 32 ++++++++++++++++---------------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/common_x86.h b/common_x86.h index 9d82090cc..f096e9074 100644 --- a/common_x86.h +++ b/common_x86.h @@ -100,7 +100,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); #define WHEREAMI -static inline int WhereAmI(void){ +static __inline int WhereAmI(void){ int eax, ebx, ecx, edx; int apicid; diff --git a/driver/level3/syr2k_k.c b/driver/level3/syr2k_k.c index 8df0f122f..09131fbdb 100644 --- a/driver/level3/syr2k_k.c +++ b/driver/level3/syr2k_k.c @@ -47,7 +47,7 @@ #endif #endif -static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { +static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { BLASLONG i; diff --git a/driver/level3/syrk_k.c b/driver/level3/syrk_k.c index 08751dc8b..8bc817f87 100644 --- a/driver/level3/syrk_k.c +++ b/driver/level3/syrk_k.c @@ -49,7 +49,7 @@ #endif #endif -static inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { +static __inline int syrk_beta(BLASLONG m_from, BLASLONG m_to, BLASLONG n_from, BLASLONG n_to, FLOAT *alpha, FLOAT *c, BLASLONG ldc) { BLASLONG i; diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c index a76be3ba7..b4f33583f 100644 --- a/lapack/getrf/getrf_parallel.c +++ b/lapack/getrf/getrf_parallel.c @@ -67,7 +67,7 @@ double sqrt(double); #undef GETRF_FACTOR #define GETRF_FACTOR 1.00 -static inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) { +static __inline BLASLONG FORMULA1(BLASLONG M, BLASLONG N, BLASLONG IS, BLASLONG BK, BLASLONG T) { double m = (double)(M - IS - BK); double n = (double)(N - IS - BK); diff --git a/symcopy.h b/symcopy.h index 48ccbd369..16172c046 100644 --- a/symcopy.h +++ b/symcopy.h @@ -43,7 +43,7 @@ #if !defined(XDOUBLE) || !defined(QUAD_PRECISION) -static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -141,7 +141,7 @@ static inline void SYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -232,7 +232,7 @@ static inline void SYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } -static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -362,7 +362,7 @@ static inline void ZSYMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -486,7 +486,7 @@ static inline void ZSYMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -613,7 +613,7 @@ static inline void ZHEMCOPY_L(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -735,7 +735,7 @@ static inline void ZHEMCOPY_U(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } -static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -862,7 +862,7 @@ static inline void ZHEMCOPY_M(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -984,7 +984,7 @@ static inline void ZHEMCOPY_V(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } -static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1082,7 +1082,7 @@ static inline void TRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1180,7 +1180,7 @@ static inline void TRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1270,7 +1270,7 @@ static inline void TRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1360,7 +1360,7 @@ static inline void TRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1490,7 +1490,7 @@ static inline void ZTRMCOPY_NL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1620,7 +1620,7 @@ static inline void ZTRMCOPY_TL(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; @@ -1744,7 +1744,7 @@ static inline void ZTRMCOPY_NU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ } } -static inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ +static __inline void ZTRMCOPY_TU(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b){ BLASLONG is, js; FLOAT *aa1, *aa2; From e74462a3f5d83853e39ac78404ba579fc9d54ea0 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 11 Feb 2015 11:16:57 -0600 Subject: [PATCH 075/137] Moved declarations to start of functions to satisfy MSVC C89 implementation. --- driver/level3/trmm_R.c | 5 +++-- driver/level3/trsm_L.c | 5 +++-- driver/level3/trsm_R.c | 5 +++-- interface/gemm.c | 14 ++++++++++---- interface/gemv.c | 12 +++++++++--- lapack/getrf/getrf_parallel.c | 4 ++++ 6 files changed, 32 insertions(+), 13 deletions(-) diff --git a/driver/level3/trmm_R.c b/driver/level3/trmm_R.c index bdd9370cd..0882aa496 100644 --- a/driver/level3/trmm_R.c +++ b/driver/level3/trmm_R.c @@ -70,6 +70,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG ls, is, js; BLASLONG min_l, min_i, min_j; BLASLONG jjs, min_jj; +#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA))) + BLASLONG start_ls; +#endif m = args -> m; n = args -> n; @@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } #else - BLASLONG start_ls; - for(js = n; js > 0; js -= GEMM_R){ min_j = js; if (min_j > GEMM_R) min_j = GEMM_R; diff --git a/driver/level3/trsm_L.c b/driver/level3/trsm_L.c index 78da0eb6c..d8130ee7e 100644 --- a/driver/level3/trsm_L.c +++ b/driver/level3/trsm_L.c @@ -76,6 +76,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG ls, is, js; BLASLONG min_l, min_i, min_j; BLASLONG jjs, min_jj; +#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA))) + BLASLONG start_is; +#endif m = args -> m; n = args -> n; @@ -178,8 +181,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } } #else - BLASLONG start_is; - for(ls = m; ls > 0; ls -= GEMM_Q){ min_l = ls; if (min_l > GEMM_Q) min_l = GEMM_Q; diff --git a/driver/level3/trsm_R.c b/driver/level3/trsm_R.c index 169441d1e..f6a57f93f 100644 --- a/driver/level3/trsm_R.c +++ b/driver/level3/trsm_R.c @@ -75,6 +75,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG ls, is, js; BLASLONG min_l, min_i, min_j; BLASLONG jjs, min_jj; +#if !((defined(UPPER) && !defined(TRANSA)) || (!defined(UPPER) && defined(TRANSA))) + BLASLONG start_ls; +#endif m = args -> m; n = args -> n; @@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } #else - BLASLONG start_ls; - for(js = n; js > 0; js -= GEMM_R){ min_j = js; if (min_j > GEMM_R) min_j = GEMM_R; diff --git a/interface/gemm.c b/interface/gemm.c index a5a2b4724..7253b0500 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -121,6 +121,9 @@ void NAME(char *TRANSA, char *TRANSB, FLOAT *sa, *sb; #ifdef SMP + int nthreads_max; + int nthreads_avail; + double MNK; #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; @@ -237,6 +240,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS XFLOAT *sa, *sb; #ifdef SMP + int nthreads_max; + int nthreads_avail; + double MNK; #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; @@ -400,15 +406,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS mode |= (transa << BLAS_TRANSA_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT); - int nthreads_max = num_cpu_avail(3); - int nthreads_avail = nthreads_max; + nthreads_max = num_cpu_avail(3); + nthreads_avail = nthreads_max; #ifndef COMPLEX - double MNK = (double) args.m * (double) args.n * (double) args.k; + MNK = (double) args.m * (double) args.n * (double) args.k; if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) nthreads_max = 1; #else - double MNK = (double) args.m * (double) args.n * (double) args.k; + MNK = (double) args.m * (double) args.n * (double) args.k; if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) nthreads_max = 1; #endif diff --git a/interface/gemv.c b/interface/gemv.c index 2dd82dce5..638329a2c 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -80,6 +80,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *buffer; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { @@ -134,6 +137,9 @@ void CNAME(enum CBLAS_ORDER order, blasint info, t; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { @@ -212,10 +218,10 @@ void CNAME(enum CBLAS_ORDER order, #ifdef SMP - int nthreads_max = num_cpu_avail(2); - int nthreads_avail = nthreads_max; + nthreads_max = num_cpu_avail(2); + nthreads_avail = nthreads_max; - double MNK = (double) m * (double) n; + MNK = (double) m * (double) n; if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) ) nthreads_max = 1; diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c index b4f33583f..8fdf76987 100644 --- a/lapack/getrf/getrf_parallel.c +++ b/lapack/getrf/getrf_parallel.c @@ -373,7 +373,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, BLASLONG num_cpu; +#ifdef _MSC_VER + BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE]; +#else volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128))); +#endif #ifndef COMPLEX #ifdef XDOUBLE From 4662a0b13aad3783645cbaa5f8035701b921d61f Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Sun, 15 Feb 2015 17:44:37 -0600 Subject: [PATCH 076/137] Changed generate functions to iterate through a list of float types. This will generate obj files for SINGLE/DOUBLE/COMPLEX/DOUBLE COMPLEX. --- CMakeLists.txt | 36 ++++++++- cmake/os.cmake | 2 +- cmake/system.cmake | 6 +- cmake/utils.cmake | 115 +++++++++++++++------------ driver/level2/CMakeLists.txt | 22 +++--- driver/level3/CMakeLists.txt | 18 ++--- driver/others/CMakeLists.txt | 8 +- interface/CMakeLists.txt | 26 +++--- kernel/CMakeLists.txt | 149 ++++++++++++++++++----------------- lapack/CMakeLists.txt | 14 ++-- 10 files changed, 222 insertions(+), 174 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9db677d35..5dd811959 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${Open enable_language(Fortran) enable_language(ASM) -message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only DOUBLE and x86 support is currently available.") +message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake") include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") @@ -36,6 +36,36 @@ if (NOT NO_LAPACK) list(APPEND SUBDIRS lapack) endif () +# set which float types we want to build for +if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) + # if none are defined, build for all + set(BUILD_SINGLE true) + set(BUILD_DOUBLE true) + set(BUILD_COMPLEX true) + set(BUILD_COMPLEX16 true) +endif () + +set(FLOAT_TYPES "") +if (BUILD_SINGLE) + message(STATUS "Building Single Precision") + list(APPEND FLOAT_TYPES "SINGLE") # defines nothing +endif () + +if (BUILD_DOUBLE) + message(STATUS "Building Double Precision") + list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE +endif () + +if (BUILD_COMPLEX) + message(STATUS "Building Complex Precision") + list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX +endif () + +if (BUILD_COMPLEX16) + message(STATUS "Building Double Complex Precision") + list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE +endif () + set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench) # all :: libs netlib tests shared @@ -62,8 +92,8 @@ endforeach () # get obj vars into format that add_library likes: $ (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) set(TARGET_OBJS "") foreach (DBLAS_OBJ ${DBLAS_OBJS}) - get_target_property(PREV_DEFS ${DBLAS_OBJ} COMPILE_DEFINITIONS) - set_target_properties(${DBLAS_OBJ} PROPERTIES COMPILE_DEFINITIONS "${PREV_DEFS};DOUBLE") + #get_target_property(PREV_DEFS ${DBLAS_OBJ} COMPILE_DEFINITIONS) + #set_target_properties(${DBLAS_OBJ} PROPERTIES COMPILE_DEFINITIONS "${PREV_DEFS};DOUBLE") list(APPEND TARGET_OBJS "$") endforeach () diff --git a/cmake/os.cmake b/cmake/os.cmake index eb7df31ed..f5a75027c 100644 --- a/cmake/os.cmake +++ b/cmake/os.cmake @@ -57,7 +57,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") # Ensure the correct stack alignment on Win32 # http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 if (${ARCH} STREQUAL "x86") - if (NOT MSVC) + if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") endif () set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2") diff --git a/cmake/system.cmake b/cmake/system.cmake index 3d58fa2e3..cc7373e47 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -32,7 +32,7 @@ if (DEFINED TARGET) set(GETARCH_FLAGS "-DFORCE_${TARGET}") endif () -if (${INTERFACE64}) +if (INTERFACE64) message(STATUS "Using 64-bit integers.") set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT") endif () @@ -43,12 +43,12 @@ endif () message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.") set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}") -if (${NO_AVX}) +if (NO_AVX) message(STATUS "Disabling Advanced Vector Extensions (AVX).") set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX") endif () -if (${NO_AVX2}) +if (NO_AVX2) message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).") set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2") endif () diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 715f298b7..81083a19f 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -64,85 +64,98 @@ endfunction () # generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition # @param sources_in the source files to build from -# @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc) # @param defines_in (optional) preprocessor definitions that will be applied to all objects # @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended. # e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" # @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) +# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) function(GenerateNamedObjects sources_in) if (DEFINED ARGV1) - set(float_type_in ${ARGV1}) + set(defines_in ${ARGV1}) endif () if (DEFINED ARGV2) - set(defines_in ${ARGV2}) + set(name_in ${ARGV2}) endif () if (DEFINED ARGV3) - set(name_in ${ARGV3}) - endif () - - if (DEFINED ARGV4) - set(use_cblas ${ARGV4}) + set(use_cblas ${ARGV3}) else () set(use_cblas 0) endif () + if (DEFINED ARGV4) + set(replace_last_with ${ARGV4}) + endif () + if (DEFINED ARGV5) - set(replace_last_with ${ARGV5}) + set(append_with ${ARGV5}) endif () if (DEFINED ARGV6) - set(append_with ${ARGV6}) + set(no_float_type ${ARGV6}) + else () + set(no_float_type false) + endif () + + if (no_float_type) + set(float_list "DUMMY") # still need to loop once + else () + set(float_list "${FLOAT_TYPES}") endif () set(OBJ_LIST_OUT "") - foreach (source_file ${sources_in}) + foreach (float_type ${float_list}) + foreach (source_file ${sources_in}) - if (DEFINED float_type_in AND NOT float_type_in STREQUAL "") - string(SUBSTRING ${float_type_in} 0 1 float_char) - string(TOLOWER ${float_char} float_char) - endif () - - if (NOT name_in) - get_filename_component(source_name ${source_file} NAME_WE) - set(obj_name "${float_char}${source_name}") - else () - # replace * with float_char - if (${name_in} MATCHES "\\*") - string(REPLACE "*" ${float_char} obj_name ${name_in}) - else () - set(obj_name "${float_char}${name_in}") + if (NOT no_float_type) + string(SUBSTRING ${float_type} 0 1 float_char) + string(TOLOWER ${float_char} float_char) endif () - endif () - if (replace_last_with) - string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name}) - else () - set(obj_name "${obj_name}${append_with}") - endif () + if (NOT name_in) + get_filename_component(source_name ${source_file} NAME_WE) + set(obj_name "${float_char}${source_name}") + else () + # replace * with float_char + if (${name_in} MATCHES "\\*") + string(REPLACE "*" ${float_char} obj_name ${name_in}) + else () + set(obj_name "${float_char}${name_in}") + endif () + endif () - # now add the object and set the defines - set(obj_defines ${defines_in}) + if (replace_last_with) + string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name}) + else () + set(obj_name "${obj_name}${append_with}") + endif () - if (use_cblas) - set(obj_name "cblas_${obj_name}") - list(APPEND obj_defines "CBLAS") - endif () + # now add the object and set the defines + set(obj_defines ${defines_in}) - list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") - list(APPEND obj_defines ${defines_in}) - if (NOT ${float_type_in} STREQUAL "SINGLE") - list(APPEND obj_defines ${float_type_in}) - endif () + if (use_cblas) + set(obj_name "cblas_${obj_name}") + list(APPEND obj_defines "CBLAS") + endif () - add_library(${obj_name} OBJECT ${source_file}) - set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${obj_defines}") + list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") + list(APPEND obj_defines ${defines_in}) + if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX") + list(APPEND obj_defines "DOUBLE") + endif () + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + list(APPEND obj_defines "COMPLEX") + endif () - list(APPEND OBJ_LIST_OUT ${obj_name}) + add_library(${obj_name} OBJECT ${source_file}) + set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${obj_defines}") + list(APPEND OBJ_LIST_OUT ${obj_name}) + + endforeach () endforeach () list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) @@ -152,7 +165,6 @@ endfunction () # generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in # @param sources_in the source files to build from # @param defines_in the preprocessor definitions that will be combined to create the object files -# @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc) # @param all_defines_in (optional) preprocessor definitions that will be applied to all objects # @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU. # If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU. @@ -160,10 +172,15 @@ endfunction () # If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects). # If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel # @param alternate_name replaces the source name as the object name (define codes are still appended) -function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_type_in all_defines_in replace_scheme) +# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) +function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme) + + if (DEFINED ARGV5) + set(alternate_name ${ARGV5}) + endif () if (DEFINED ARGV6) - set(alternate_name ${ARGV6}) + set(no_float_type ${ARGV6}) endif () AllCombinations("${defines_in}" "${absent_codes_in}") @@ -223,7 +240,7 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_ endif () endif () - GenerateNamedObjects("${source_file}" "${float_type_in}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}") + GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}" "${no_float_type}") list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}") endforeach () endforeach () diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index d8f8123d3..4524ad688 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -28,26 +28,26 @@ set(NU_SOURCES ) # objects that need LOWER set -GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "DOUBLE" "" 1) +GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1) # objects that need TRANSA and UNIT set # N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* because of course why not have a different naming scheme for every single object -hpa -GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "DOUBLE" "" 3) +GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "" 3) # gbmv uses a lowercase n and t. WHY? WHO KNOWS! -GenerateNamedObjects("gbmv_k.c" "DOUBLE" "" "gbmv_n") -GenerateNamedObjects("gbmv_k.c" "DOUBLE" "TRANS" "gbmv_t") +GenerateNamedObjects("gbmv_k.c" "" "gbmv_n") +GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t") if (SMP) # gbmv uses a lowercase n and t. N.B. this uses TRANSA where gbmv.c uses TRANS. Intentional? - GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "" "gbmv_thread_n") - GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "TRANSA" "gbmv_thread_t") + GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n") + GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t") - GenerateNamedObjects("gemv_thread.c" "DOUBLE" "" "gemv_thread_n") - GenerateNamedObjects("gemv_thread.c" "DOUBLE" "TRANSA" "gemv_thread_t") + GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n") + GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t") - GenerateNamedObjects("ger_thread.c" "DOUBLE") + GenerateNamedObjects("ger_thread.c") set(UL_SMP_SOURCES symv_thread.c @@ -59,7 +59,7 @@ if (SMP) sbmv_thread.c ) - GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "DOUBLE" "" 2) + GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) set(NU_SMP_SOURCES trmv_thread.c @@ -67,7 +67,7 @@ if (SMP) tbmv_thread.c ) - GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "DOUBLE" "" 2) + GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "" 2) endif () diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index b9a817323..7259a87e7 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -26,25 +26,25 @@ endif () set(GEMM_DEFINES NN NT TN TT) foreach (GEMM_DEFINE ${GEMM_DEFINES}) string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) - GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0) + GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0) if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) - GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0) + GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0) endif () endforeach () -GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "DOUBLE" "" 0) -GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "DOUBLE" "NN" 1) -GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "U;N" "DOUBLE" "" 1) -GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "DOUBLE" "" 2) +GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "" 0) +GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "NN" 1) +GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "U;N" "" 1) +GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "" 2) if (SMP) # N.B. these do NOT have a float type (e.g. DOUBLE) defined! - GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" "" 0) + GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" 0 "" "" 1) if (NOT USE_SIMPLE_THREADED_LEVEL3) - GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "DOUBLE" "THREADED_LEVEL3" 2 "syrk_thread") - GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "DOUBLE" "THREADED_LEVEL3;NN" 2 "symm_thread") + GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "THREADED_LEVEL3" 2 "syrk_thread") + GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "THREADED_LEVEL3;NN" 2 "symm_thread") endif () endif () diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index 7f8672eb2..3e17ce5be 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -35,10 +35,10 @@ set(COMMON_SOURCES openblas_error_handle.c ) -# these need to have NAME/CNAME set, so use GenerateNamedObjects -GenerateNamedObjects("abs.c" "" "" "c_abs") -GenerateNamedObjects("abs.c" "" "DOUBLE" "z_abs") -GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c") +# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling +GenerateNamedObjects("abs.c" "" "c_abs" 0 "" "" 1 ) +GenerateNamedObjects("abs.c" "DOUBLE" "z_abs" 0 "" "" 1) +GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c" "" "" 0 "" "" 1) if (DYNAMIC_ARCH) list(APPEND COMMON_SOURCES dynamic.c) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 1b0ac42d6..739705d17 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -40,23 +40,23 @@ endif () foreach (CBLAS_FLAG ${CBLAS_FLAGS}) - GenerateNamedObjects("${BLAS1_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG}) - GenerateNamedObjects("${BLAS2_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG}) - GenerateNamedObjects("${BLAS3_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG}) + GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG}) + GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG}) + GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG}) # trmm is trsm with a compiler flag set - GenerateNamedObjects("trsm.c" "DOUBLE" "TRMM" "trmm" ${CBLAS_FLAG}) + GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) # max and imax are compiled 4 times - GenerateNamedObjects("max.c" "DOUBLE" "" "" ${CBLAS_FLAG}) - GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS" "amax" ${CBLAS_FLAG}) - GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG}) - GenerateNamedObjects("max.c" "DOUBLE" "USE_MIN" "min" ${CBLAS_FLAG}) + GenerateNamedObjects("max.c" "" "" ${CBLAS_FLAG}) + GenerateNamedObjects("max.c" "USE_ABS" "amax" ${CBLAS_FLAG}) + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG}) + GenerateNamedObjects("max.c" "USE_MIN" "min" ${CBLAS_FLAG}) - GenerateNamedObjects("imax.c" "DOUBLE" "" "i*max" ${CBLAS_FLAG}) - GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS" "i*amax" ${CBLAS_FLAG}) - GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG}) - GenerateNamedObjects("imax.c" "DOUBLE" "USE_MIN" "i*min" ${CBLAS_FLAG}) + GenerateNamedObjects("imax.c" "" "i*max" ${CBLAS_FLAG}) + GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" ${CBLAS_FLAG}) + GenerateNamedObjects("imax.c" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG}) + GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG}) endforeach () @@ -66,7 +66,7 @@ if (NOT DEFINED NO_LAPACK) lapack/potf2.c lapack/laswp.c lapack/gesv.c lapack/lauu2.c lapack/lauum.c lapack/trti2.c lapack/trtri.c ) - GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE" "" "" 0) + GenerateNamedObjects("${LAPACK_SOURCES}") endif () set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index a83bd0dbe..6c259039b 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -11,9 +11,10 @@ endif () set(LSAME_KERNEL lsame.S) set(SCABS_KERNEL cabs.S) set(DCABS_KERNEL cabs.S) -GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "" "F_INTERFACE" "lsame") -GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "SINGLE" "COMPLEX;F_INTERFACE" "cabs1") -GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE" "COMPLEX;F_INTERFACE" "cabs1") +# don't use float type name mangling here +GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" 0 "" "" 1) +GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" "" "" 1) +GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" 0 "" "" 1) # Makefile.L1 @@ -37,30 +38,30 @@ set(DSCALKERNEL scal.S) set(DSWAPKERNEL swap.S) set(DAXPBYKERNEL ../arm/axpby.c) -GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "DOUBLE" "USE_ABS" "amax_k") -GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "DOUBLE" "USE_ABS;USE_MIN" "amin_k") -GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "DOUBLE" "" "max_k") -GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "DOUBLE" "" "min_k") -GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "DOUBLE" "USE_ABS" "i*amax_k") -GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "DOUBLE" "USE_ABS;USE_MIN" "i*amin_k") -GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "DOUBLE" "" "i*max_k") -GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "DOUBLE" "" "i*min_k") -GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "DOUBLE" "" "asum_k") -GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "DOUBLE" "" "axpy_k") -GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "DOUBLE" "C_INTERFACE" "copy_k") -GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "DOUBLE" "" "dot_k") -GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "DOUBLE" "" "nrm2_k") -GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "DOUBLE" "" "rot_k") -GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "DOUBLE" "" "scal_k") -GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "DOUBLE" "" "swap_k") -GenerateNamedObjects("${KERNELDIR}/${DAXPBYKERNEL}" "DOUBLE" "" "axpby_k") +GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k") +GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k") +GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k") +GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "" "min_k") +GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k") +GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k") +GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k") +GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "" "i*min_k") +GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k") +GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k") +GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k") +GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k") +GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k") +GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k") +GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k") +GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k") +GenerateNamedObjects("${KERNELDIR}/${DAXPBYKERNEL}" "" "axpby_k") # Makefile.L2 GenerateNamedObjects("${KERNELDIR}/gemv_n.S" "DOUBLE") -GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "DOUBLE" "TRANS") -GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "DOUBLE" "" 1) -GenerateNamedObjects("generic/ger.c" "DOUBLE" "" "ger_k") +GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "TRANS") +GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1) +GenerateNamedObjects("generic/ger.c" "" "ger_k") # Makefile.L3 @@ -76,78 +77,78 @@ set(DGEMMITCOPYOBJ gemm_itcopy) set(DGEMMONCOPYOBJ gemm_oncopy) set(DGEMMOTCOPYOBJ gemm_otcopy) -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "" "gemm_kernel") +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel") if (DGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "" "${DGEMMINCOPYOBJ}") + GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "" "${DGEMMINCOPYOBJ}") endif () if (DGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "" "${DGEMMITCOPYOBJ}") + GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "" "${DGEMMITCOPYOBJ}") endif () if (DGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "" "${DGEMMONCOPYOBJ}") + GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "" "${DGEMMONCOPYOBJ}") endif () if (DGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "" "${DGEMMOTCOPYOBJ}") + GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "" "${DGEMMOTCOPYOBJ}") endif () -GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "DOUBLE") -GenerateCombinationObjects("${KERNELDIR}/${DGEMMKERNEL}" "LEFT;TRANSA" "R;N" "DOUBLE" "TRMMKERNEL" 2 "trmm_kernel") -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN") -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "LT;TRSMKERNEL" "trsm_kernel_LT") -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN") -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "RT;TRSMKERNEL" "trsm_kernel_RT") +GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}") +GenerateCombinationObjects("${KERNELDIR}/${DGEMMKERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel") +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN") +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "LT;TRSMKERNEL" "trsm_kernel_LT") +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN") +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "RT;TRSMKERNEL" "trsm_kernel_RT") # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. -GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trmm_iunucopy") -GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trmm_iunncopy") -GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trmm_ounucopy") -GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trmm_ounncopy") +GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy") +GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "" "trmm_iunncopy") +GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_ounucopy") +GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER" "trmm_ounncopy") -GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trmm_ilnucopy") -GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trmm_ilnncopy") -GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trmm_olnucopy") -GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trmm_olnncopy") +GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy") +GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy") +GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy") +GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_olnncopy") -GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trmm_iutucopy") -GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trmm_iutncopy") -GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trmm_outucopy") -GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trmm_outncopy") +GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy") +GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "" "trmm_iutncopy") +GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_outucopy") +GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER" "trmm_outncopy") -GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trmm_iltucopy") -GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trmm_iltncopy") -GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trmm_oltucopy") -GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trmm_oltncopy") +GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy") +GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy") +GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy") +GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_oltncopy") -GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trsm_iunucopy") -GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trsm_iunncopy") -GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trsm_ounucopy") -GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trsm_ounncopy") +GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy") +GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "" "trsm_iunncopy") +GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_ounucopy") +GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER" "trsm_ounncopy") -GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trsm_ilnucopy") -GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trsm_ilnncopy") -GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trsm_olnucopy") -GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trsm_olnncopy") +GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy") +GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy") +GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy") +GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_olnncopy") -GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trsm_iutucopy") -GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trsm_iutncopy") -GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trsm_outucopy") -GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trsm_outncopy") +GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy") +GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "" "trsm_iutncopy") +GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_outucopy") +GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER" "trsm_outncopy") -GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trsm_iltucopy") -GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trsm_iltncopy") -GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trsm_oltucopy") -GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trsm_oltncopy") +GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy") +GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy") +GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy") +GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_oltncopy") -GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "OUTER" "symm_outcopy") -GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "" "symm_iutcopy") +GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "OUTER" "symm_outcopy") +GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "" "symm_iutcopy") -GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "LOWER;OUTER" "symm_oltcopy") -GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "LOWER" "symm_iltcopy") +GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy") +GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "LOWER" "symm_iltcopy") if (NOT DEFINED DOMATCOPY_CN) set(DOMATCOPY_CN ../arm/omatcopy_cn.c) @@ -162,10 +163,10 @@ if (NOT DEFINED DOMATCOPY_RT) set(DOMATCOPY_RT ../arm/omatcopy_rt.c) endif () -GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CN}" "DOUBLE" "" "domatcopy_k_cn") -GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RN}" "DOUBLE" "ROWM" "domatcopy_k_rn") -GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CT}" "DOUBLE" "" "domatcopy_k_ct") -GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RT}" "DOUBLE" "ROWM" "domatcopy_k_rt") +GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CN}" "" "domatcopy_k_cn") +GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RN}" "ROWM" "domatcopy_k_rn") +GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CT}" "" "domatcopy_k_ct") +GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RT}" "ROWM" "domatcopy_k_rt") # Makefile.LA #DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 346f96e34..ed598f22d 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -50,11 +50,11 @@ set(ZLAPACK_SOURCES trtri/trtri_L_single.c ) -GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE") +GenerateNamedObjects("${LAPACK_SOURCES}") # TODO: laswp needs arch specific code -GenerateNamedObjects("laswp/generic/laswp_k.c" "DOUBLE" "" "laswp_plus") -GenerateNamedObjects("laswp/generic/laswp_k.c" "DOUBLE" "MINUS" "laswp_minus") +GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus") +GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus") if (SMP) @@ -92,12 +92,12 @@ if (SMP) trtri/trtri_L_parallel.c ) - GenerateNamedObjects("${PARALLEL_SOURCES}" "DOUBLE" "" "" 0) + GenerateNamedObjects("${PARALLEL_SOURCES}") endif () -GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "DOUBLE" "" 4) -GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "DOUBLE" "" 4) -GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "DOUBLE" "" 0) +GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4) +GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) +GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0) set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 9eb1499095c6bf523ae4024f3707fd88cad7b131 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 17 Feb 2015 10:30:28 -0600 Subject: [PATCH 077/137] Added another param to GenerateNamedObjects to mangle complex source names. There are a lot of sources for complex float types that are the same names as the real sources, except with z prepended. --- CMakeLists.txt | 2 -- cmake/utils.cmake | 31 +++++++++++++++++++++++++++++++ interface/CMakeLists.txt | 35 +++++++++++++++++++++++++++++------ 3 files changed, 60 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5dd811959..85b20b176 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,8 +92,6 @@ endforeach () # get obj vars into format that add_library likes: $ (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) set(TARGET_OBJS "") foreach (DBLAS_OBJ ${DBLAS_OBJS}) - #get_target_property(PREV_DEFS ${DBLAS_OBJ} COMPILE_DEFINITIONS) - #set_target_properties(${DBLAS_OBJ} PROPERTIES COMPILE_DEFINITIONS "${PREV_DEFS};DOUBLE") list(APPEND TARGET_OBJS "$") endforeach () diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 81083a19f..aaa669abd 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -70,6 +70,11 @@ endfunction () # @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) # @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) +# @param complex_only/real_only some routines have separate source files for complex and non-complex float types. +# 0 - compiles for all types +# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) +# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) +# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) function(GenerateNamedObjects sources_in) if (DEFINED ARGV1) @@ -100,10 +105,30 @@ function(GenerateNamedObjects sources_in) set(no_float_type false) endif () + set(real_only false) + set(complex_only false) + set(mangle_complex_sources false) + if (DEFINED ARGV7) + if (${ARGV7} EQUAL 1) + set(real_only true) + elseif (${ARGV7} EQUAL 2) + set(complex_only true) + elseif (${ARGV7} EQUAL 3) + set(mangle_complex_sources true) + endif () + endif () + if (no_float_type) set(float_list "DUMMY") # still need to loop once else () set(float_list "${FLOAT_TYPES}") + if (complex_only) + list(REMOVE_ITEM float_list "SINGLE") + list(REMOVE_ITEM float_list "DOUBLE") + elseif (real_only) + list(REMOVE_ITEM float_list "COMPLEX") + list(REMOVE_ITEM float_list "ZCOMPLEX") + endif () endif () set(OBJ_LIST_OUT "") @@ -148,6 +173,12 @@ function(GenerateNamedObjects sources_in) endif () if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") list(APPEND obj_defines "COMPLEX") + if (mangle_complex_sources) + # add a z to the filename + get_filename_component(source_name ${source_file} NAME) + get_filename_component(source_dir ${source_file} DIRECTORY) + string(REPLACE ${source_name} "z${source_name}" source_file ${source_file}) + endif () endif () add_library(${obj_name} OBJECT ${source_file}) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 739705d17..030a14fd2 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -2,15 +2,25 @@ include_directories(${CMAKE_SOURCE_DIR}) set(BLAS1_SOURCES - axpy.c swap.c - copy.c scal.c - dot.c + copy.c asum.c nrm2.c - rot.c rotg.c rotm.c rotmg.c +) + +set(BLAS1_REAL_ONLY_SOURCES + rotm.c rotmg.c # N.B. these do not have complex counterparts +) + +# these will have 'z' prepended for the complex version +set(BLAS1_MANGLED_SOURCES + axpy.c swap.c + scal.c + dot.c + rot.c rotg.c axpby.c ) # TODO: USE_NETLIB_GEMV shoudl switch gemv.c to netlib/*gemv.f +# these all have 'z' sources for complex versions set(BLAS2_SOURCES gemv.c ger.c trsv.c trmv.c symv.c @@ -24,6 +34,9 @@ set(BLAS2_SOURCES set(BLAS3_SOURCES gemm.c symm.c trsm.c syrk.c syr2k.c +) + +set(BLAS3_MANGLED_SOURCES omatcopy.c imatcopy.c ) @@ -41,8 +54,11 @@ endif () foreach (CBLAS_FLAG ${CBLAS_FLAGS}) GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG}) - GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG}) + GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 1) + GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) + GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG}) + GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) # trmm is trsm with a compiler flag set GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) @@ -62,11 +78,18 @@ endforeach () if (NOT DEFINED NO_LAPACK) set(LAPACK_SOURCES + lapack/gesv.c + ) + + # prepend z for complex versions + set(LAPACK_MANGLED_SOURCES lapack/getrf.c lapack/getrs.c lapack/potrf.c lapack/getf2.c - lapack/potf2.c lapack/laswp.c lapack/gesv.c lapack/lauu2.c + lapack/potf2.c lapack/laswp.c lapack/lauu2.c lapack/lauum.c lapack/trti2.c lapack/trtri.c ) + GenerateNamedObjects("${LAPACK_SOURCES}") + GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) endif () set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 67e39bd8fb797cc420b191780fa4ae3ae00792ff Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 17 Feb 2015 13:12:30 -0600 Subject: [PATCH 078/137] Added mangled complex filenames to interface and lapack CMakeLists.txt. --- cmake/utils.cmake | 13 +++++++++---- interface/CMakeLists.txt | 20 ++++++++++++++------ lapack/CMakeLists.txt | 40 ++++++++++++++++++++-------------------- 3 files changed, 43 insertions(+), 30 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index aaa669abd..d9c180fb6 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -70,7 +70,7 @@ endfunction () # @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) # @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) -# @param complex_only/real_only some routines have separate source files for complex and non-complex float types. +# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types. # 0 - compiles for all types # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) @@ -88,7 +88,7 @@ function(GenerateNamedObjects sources_in) if (DEFINED ARGV3) set(use_cblas ${ARGV3}) else () - set(use_cblas 0) + set(use_cblas false) endif () if (DEFINED ARGV4) @@ -108,7 +108,7 @@ function(GenerateNamedObjects sources_in) set(real_only false) set(complex_only false) set(mangle_complex_sources false) - if (DEFINED ARGV7) + if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "") if (${ARGV7} EQUAL 1) set(real_only true) elseif (${ARGV7} EQUAL 2) @@ -204,6 +204,7 @@ endfunction () # If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel # @param alternate_name replaces the source name as the object name (define codes are still appended) # @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) +# @param complex_filename_scheme see GenerateNamedObjects function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme) if (DEFINED ARGV5) @@ -214,6 +215,10 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de set(no_float_type ${ARGV6}) endif () + if (DEFINED ARGV7) + set(complex_filename_scheme ${ARGV7}) + endif () + AllCombinations("${defines_in}" "${absent_codes_in}") set(define_combos ${LIST_OUT}) set(define_codes ${CODES_OUT}) @@ -271,7 +276,7 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de endif () endif () - GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}" "${no_float_type}") + GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}") list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}") endforeach () endforeach () diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 030a14fd2..633b8a6fe 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -31,6 +31,7 @@ set(BLAS2_SOURCES tpsv.c tpmv.c ) +# these do not have separate 'z' sources set(BLAS3_SOURCES gemm.c symm.c trsm.c syrk.c syr2k.c @@ -53,12 +54,19 @@ endif () foreach (CBLAS_FLAG ${CBLAS_FLAGS}) - GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG}) - GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 1) - GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) - GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) - GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG}) - GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) + # TODO: don't compile complex sources with cblas for now, the naming schemes are all different and they will have to be handled separately from SINGLE/DOUBLE + set(DISABLE_COMPLEX 0) + set(MANGLE_COMPLEX 3) + if (CBLAS_FLAG EQUAL 1) + set(DISABLE_COMPLEX 1) + set(MANGLE_COMPLEX 1) + endif () + GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) + GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) + GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) + GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) # trmm is trsm with a compiler flag set GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index ed598f22d..26922f50e 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -3,29 +3,36 @@ include_directories(${CMAKE_SOURCE_DIR}) set(LAPACK_SOURCES - getf2/getf2_k.c getrf/getrf_single.c potrf/potrf_U_single.c potrf/potrf_L_single.c - potf2/potf2_U.c - potf2/potf2_L.c - lauu2/lauu2_U.c - lauu2/lauu2_L.c lauum/lauum_U_single.c lauum/lauum_L_single.c ) +# add a 'z' to filename for complex version +set(LAPACK_MANGLED_SOURCES + getf2/getf2_k.c + lauu2/lauu2_U.c + lauu2/lauu2_L.c + potf2/potf2_U.c + potf2/potf2_L.c +) + # sources that need TRANS set +# this has a 'z' version set(TRANS_SOURCES getrs/getrs_single.c ) # sources that need UNIT set +# these do NOT have a z version set(UNIT_SOURCES trtri/trtri_U_single.c trtri/trtri_L_single.c ) +# these have a 'z' version set(UNIT_SOURCES2 trti2/trti2_U.c trti2/trti2_L.c @@ -51,6 +58,7 @@ set(ZLAPACK_SOURCES ) GenerateNamedObjects("${LAPACK_SOURCES}") +GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" "" 3) # TODO: laswp needs arch specific code GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus") @@ -64,40 +72,32 @@ if (SMP) set(GETRF_SRC getrf/getrf_parallel.c) endif () + # these do not have 'z' versions set(PARALLEL_SOURCES ${GETRF_SRC} - potrf/potrf_U_parallel.c - potrf/potrf_L_parallel.c lauum/lauum_U_parallel.c lauum/lauum_L_parallel.c + potrf/potrf_U_parallel.c + potrf/potrf_L_parallel.c ) + # this has a z version list(APPEND TRANS_SOURCES getrs/getrs_parallel.c ) + # these do NOT have a z version list(APPEND UNIT_SOURCES trtri/trtri_U_parallel.c trtri/trtri_L_parallel.c ) - set(ZPARALLEL_SOURCES - ${GETRF_SRC} - getrs/zgetrs_parallel.c - potrf/potrf_U_parallel.c - potrf/potrf_L_parallel.c - lauum/lauum_U_parallel.c - lauum/lauum_L_parallel.c - trtri/trtri_U_parallel.c - trtri/trtri_L_parallel.c - ) - GenerateNamedObjects("${PARALLEL_SOURCES}") endif () -GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4) +GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" "" 3) GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) -GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0) +GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3) set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From 33c5e8db7f60035c748dc1aac370775025936a34 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 17 Feb 2015 21:36:23 -0600 Subject: [PATCH 079/137] Added a helper function for setting the L1 kernel defaults. Added loop to build objects with different KERNEL defines. --- cmake/kernel.cmake | 110 +++++++++++++++++++++++++++++++++++ cmake/utils.cmake | 47 +++++++++++---- driver/level2/CMakeLists.txt | 8 +-- kernel/CMakeLists.txt | 91 +++++++++++++++-------------- 4 files changed, 196 insertions(+), 60 deletions(-) create mode 100644 cmake/kernel.cmake diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake new file mode 100644 index 000000000..211da229d --- /dev/null +++ b/cmake/kernel.cmake @@ -0,0 +1,110 @@ +# helper functions for the kernel CMakeLists.txt + + +# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file. +macro(SetDefaultL1) + set(SAMAXKERNEL amax.S) + set(DAMAXKERNEL amax.S) + set(QAMAXKERNEL amax.S) + set(CAMAXKERNEL zamax.S) + set(ZAMAXKERNEL zamax.S) + set(XAMAXKERNEL zamax.S) + set(SAMINKERNEL amin.S) + set(DAMINKERNEL amin.S) + set(QAMINKERNEL amin.S) + set(CAMINKERNEL zamin.S) + set(ZAMINKERNEL zamin.S) + set(XAMINKERNEL zamin.S) + set(SMAXKERNEL max.S) + set(DMAXKERNEL max.S) + set(QMAXKERNEL max.S) + set(SMINKERNEL min.S) + set(DMINKERNEL min.S) + set(QMINKERNEL min.S) + set(ISAMAXKERNEL iamax.S) + set(IDAMAXKERNEL iamax.S) + set(IQAMAXKERNEL iamax.S) + set(ICAMAXKERNEL izamax.S) + set(IZAMAXKERNEL izamax.S) + set(IXAMAXKERNEL izamax.S) + set(ISAMINKERNEL iamin.S) + set(IDAMINKERNEL iamin.S) + set(IQAMINKERNEL iamin.S) + set(ICAMINKERNEL izamin.S) + set(IZAMINKERNEL izamin.S) + set(IXAMINKERNEL izamin.S) + set(ISMAXKERNEL iamax.S) + set(IDMAXKERNEL iamax.S) + set(IQMAXKERNEL iamax.S) + set(ISMINKERNEL iamin.S) + set(IDMINKERNEL iamin.S) + set(IQMINKERNEL iamin.S) + set(SASUMKERNEL asum.S) + set(DASUMKERNEL asum.S) + set(CASUMKERNEL zasum.S) + set(ZASUMKERNEL zasum.S) + set(QASUMKERNEL asum.S) + set(XASUMKERNEL zasum.S) + set(SAXPYKERNEL axpy.S) + set(DAXPYKERNEL axpy.S) + set(CAXPYKERNEL zaxpy.S) + set(ZAXPYKERNEL zaxpy.S) + set(QAXPYKERNEL axpy.S) + set(XAXPYKERNEL zaxpy.S) + set(SCOPYKERNEL copy.S) + set(DCOPYKERNEL copy.S) + set(CCOPYKERNEL zcopy.S) + set(ZCOPYKERNEL zcopy.S) + set(QCOPYKERNEL copy.S) + set(XCOPYKERNEL zcopy.S) + set(SDOTKERNEL dot.S) + set(DDOTKERNEL dot.S) + set(CDOTKERNEL zdot.S) + set(ZDOTKERNEL zdot.S) + set(QDOTKERNEL dot.S) + set(XDOTKERNEL zdot.S) + set(SNRM2KERNEL nrm2.S) + set(DNRM2KERNEL nrm2.S) + set(QNRM2KERNEL nrm2.S) + set(CNRM2KERNEL znrm2.S) + set(ZNRM2KERNEL znrm2.S) + set(XNRM2KERNEL znrm2.S) + set(SROTKERNEL rot.S) + set(DROTKERNEL rot.S) + set(QROTKERNEL rot.S) + set(CROTKERNEL zrot.S) + set(ZROTKERNEL zrot.S) + set(XROTKERNEL zrot.S) + set(SSCALKERNEL scal.S) + set(DSCALKERNEL scal.S) + set(CSCALKERNEL zscal.S) + set(ZSCALKERNEL zscal.S) + set(QSCALKERNEL scal.S) + set(XSCALKERNEL zscal.S) + set(SSWAPKERNEL swap.S) + set(DSWAPKERNEL swap.S) + set(CSWAPKERNEL zswap.S) + set(ZSWAPKERNEL zswap.S) + set(QSWAPKERNEL swap.S) + set(XSWAPKERNEL zswap.S) + set(SGEMVNKERNEL gemv_n.S) + set(SGEMVTKERNEL gemv_t.S) + set(DGEMVNKERNEL gemv_n.S) + set(DGEMVTKERNEL gemv_t.S) + set(CGEMVNKERNEL zgemv_n.S) + set(CGEMVTKERNEL zgemv_t.S) + set(ZGEMVNKERNEL zgemv_n.S) + set(ZGEMVTKERNEL zgemv_t.S) + set(QGEMVNKERNEL gemv_n.S) + set(QGEMVTKERNEL gemv_t.S) + set(XGEMVNKERNEL zgemv_n.S) + set(XGEMVTKERNEL zgemv_t.S) + set(SCABS_KERNEL cabs.S) + set(DCABS_KERNEL cabs.S) + set(QCABS_KERNEL cabs.S) + set(LSAME_KERNEL lsame.S) + set(SAXPBYKERNEL ../arm/axpby.c) + set(DAXPBYKERNEL ../arm/axpby.c) + set(CAXPBYKERNEL ../arm/zaxpby.c) + set(ZAXPBYKERNEL ../arm/zaxpby.c) +endmacro () diff --git a/cmake/utils.cmake b/cmake/utils.cmake index d9c180fb6..9635b210c 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -12,6 +12,27 @@ function(ParseGetArchVars GETARCH_IN) endforeach () endfunction () +# Reads a Makefile into CMake vars. +# TODO: read nested Makefiles (I think 1 level should do) +# TODO: respect IFDEF/IFNDEF? +# TODO: regex replace makefile vars, e.g. $(TSUFFIX) is set to the target arch in the var CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) +# TODO: bail when makefile is missing, like -include +function(ParseMakefileVars MAKEFILE_IN) + message(STATUS "Reading vars from ${MAKEFILE_IN}...") + file(STRINGS ${MAKEFILE_IN} makefile_contents) + foreach (makefile_line ${makefile_contents}) + string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}") + if (NOT "${line_match}" STREQUAL "") + set(var_name ${CMAKE_MATCH_1}) + set(var_value ${CMAKE_MATCH_2}) + set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) + message(STATUS "found var ${var_name} = ${var_value}") + else () + message(STATUS "couldn't parse ${makefile_line} into a var") + endif () + endforeach () +endfunction () + # Returns all combinations of the input list, as a list with colon-separated combinations # E.g. input of A B C returns A B C A:B A:C B:C # N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). @@ -75,6 +96,7 @@ endfunction () # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) # 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) +# STRING - compiles only the given type (e.g. DOUBLE) function(GenerateNamedObjects sources_in) if (DEFINED ARGV1) @@ -105,6 +127,12 @@ function(GenerateNamedObjects sources_in) set(no_float_type false) endif () + if (no_float_type) + set(float_list "DUMMY") # still need to loop once + else () + set(float_list "${FLOAT_TYPES}") + endif () + set(real_only false) set(complex_only false) set(mangle_complex_sources false) @@ -115,20 +143,17 @@ function(GenerateNamedObjects sources_in) set(complex_only true) elseif (${ARGV7} EQUAL 3) set(mangle_complex_sources true) + elseif (NOT ${ARGV7} EQUAL 0) + set(float_list ${ARGV7}) endif () endif () - if (no_float_type) - set(float_list "DUMMY") # still need to loop once - else () - set(float_list "${FLOAT_TYPES}") - if (complex_only) - list(REMOVE_ITEM float_list "SINGLE") - list(REMOVE_ITEM float_list "DOUBLE") - elseif (real_only) - list(REMOVE_ITEM float_list "COMPLEX") - list(REMOVE_ITEM float_list "ZCOMPLEX") - endif () + if (complex_only) + list(REMOVE_ITEM float_list "SINGLE") + list(REMOVE_ITEM float_list "DOUBLE") + elseif (real_only) + list(REMOVE_ITEM float_list "COMPLEX") + list(REMOVE_ITEM float_list "ZCOMPLEX") endif () set(OBJ_LIST_OUT "") diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 4524ad688..a1685dbd6 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -28,15 +28,15 @@ set(NU_SOURCES ) # objects that need LOWER set -GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1) +GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) # objects that need TRANSA and UNIT set # N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* because of course why not have a different naming scheme for every single object -hpa -GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "" 3) +GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "" 3 "" "" 3) # gbmv uses a lowercase n and t. WHY? WHO KNOWS! -GenerateNamedObjects("gbmv_k.c" "" "gbmv_n") -GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t") +GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) +GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) if (SMP) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 6c259039b..479b1838f 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -1,67 +1,68 @@ include_directories(${CMAKE_SOURCE_DIR}) +include("${CMAKE_SOURCE_DIR}/cmake/kernel.cmake") # Makeflie +if (DEFINED TARGET_CORE) + #override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) + set(BUILD_KERNEL 1) + set(KDIR "") + set(TSUFFIX "_${TARGET_CORE}") +else () + set(TARGET_CORE ${CORE}) + set(KDIR "") + set(TSUFFIX "") +endif () + +SetDefaultL1() +#-include $(KERNELDIR)/KERNEL.$(TARGET_CORE) +#include $(KERNELDIR)/KERNEL +ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") +ParseMakefileVars("${KERNELDIR}/KERNEL") + if (${ARCH} STREQUAL "x86") GenerateNamedObjects("${KERNELDIR}/cpuid.S" "") endif () -# TODO: Read from ${KERNELDIR}/KERNEL - some architectures use a different lsame -set(LSAME_KERNEL lsame.S) -set(SCABS_KERNEL cabs.S) -set(DCABS_KERNEL cabs.S) # don't use float type name mangling here -GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" 0 "" "" 1) -GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" "" "" 1) -GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" 0 "" "" 1) - +GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" false "" "" true) +GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" false "" "" true) +GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" false "" "" true) # Makefile.L1 # TODO: need to read ${KERNELDIR}/KERNEL into CMake vars -set(DAMAXKERNEL amax.S) -set(DAMINKERNEL amax.S) -set(DMAXKERNEL amax.S) -set(DMINKERNEL amax.S) -set(IDAMAXKERNEL iamax.S) -set(IDAMINKERNEL iamax.S) -set(IDMAXKERNEL iamax.S) -set(IDMINKERNEL iamax.S) -set(DASUMKERNEL asum.S) -set(DAXPYKERNEL axpy.S) -set(DCOPYKERNEL copy.S) -set(DDOTKERNEL dot.S) -set(DNRM2KERNEL nrm2.S) -set(DROTKERNEL rot.S) -set(DSCALKERNEL scal.S) -set(DSWAPKERNEL swap.S) -set(DAXPBYKERNEL ../arm/axpby.c) -GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k") -GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k") -GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k") -GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "" "min_k") -GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k") -GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k") -GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k") -GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "" "i*min_k") -GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k") -GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k") -GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k") -GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k") -GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k") -GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k") -GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k") -GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k") -GenerateNamedObjects("${KERNELDIR}/${DAXPBYKERNEL}" "" "axpby_k") +foreach (float_type ${FLOAT_TYPES}) + + # a bit of metaprogramming here to pull out the appropriate KERNEL var + string(SUBSTRING ${float_type} 0 1 float_char) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) +endforeach () # Makefile.L2 GenerateNamedObjects("${KERNELDIR}/gemv_n.S" "DOUBLE") GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "TRANS") -GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1) -GenerateNamedObjects("generic/ger.c" "" "ger_k") +GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) +GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) # Makefile.L3 @@ -77,7 +78,7 @@ set(DGEMMITCOPYOBJ gemm_itcopy) set(DGEMMONCOPYOBJ gemm_oncopy) set(DGEMMOTCOPYOBJ gemm_otcopy) -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel") +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" "" 3) if (DGEMMINCOPY) GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "" "${DGEMMINCOPYOBJ}") From cebc07cebde00616b9b3facdb4cbe21f3aba3847 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 17 Feb 2015 22:09:41 -0600 Subject: [PATCH 080/137] ParseMakefileVars now recursively parses included makefiles. --- cmake/utils.cmake | 15 +++++++++------ kernel/CMakeLists.txt | 23 +++-------------------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 9635b210c..fbb546dbe 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -13,11 +13,9 @@ function(ParseGetArchVars GETARCH_IN) endfunction () # Reads a Makefile into CMake vars. -# TODO: read nested Makefiles (I think 1 level should do) # TODO: respect IFDEF/IFNDEF? # TODO: regex replace makefile vars, e.g. $(TSUFFIX) is set to the target arch in the var CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) -# TODO: bail when makefile is missing, like -include -function(ParseMakefileVars MAKEFILE_IN) +macro(ParseMakefileVars MAKEFILE_IN) message(STATUS "Reading vars from ${MAKEFILE_IN}...") file(STRINGS ${MAKEFILE_IN} makefile_contents) foreach (makefile_line ${makefile_contents}) @@ -25,13 +23,18 @@ function(ParseMakefileVars MAKEFILE_IN) if (NOT "${line_match}" STREQUAL "") set(var_name ${CMAKE_MATCH_1}) set(var_value ${CMAKE_MATCH_2}) - set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) + set(${var_name} ${var_value}) message(STATUS "found var ${var_name} = ${var_value}") else () - message(STATUS "couldn't parse ${makefile_line} into a var") + string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}") + if (NOT "${line_match}" STREQUAL "") + ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1}) + else () + message(STATUS "couldn't parse ${makefile_line} into a var") + endif () endif () endforeach () -endfunction () +endmacro () # Returns all combinations of the input list, as a list with colon-separated combinations # E.g. input of A B C returns A B C A:B A:C B:C diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 479b1838f..12c27fd50 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -2,7 +2,7 @@ include_directories(${CMAKE_SOURCE_DIR}) include("${CMAKE_SOURCE_DIR}/cmake/kernel.cmake") -# Makeflie +# Makefile if (DEFINED TARGET_CORE) #override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) @@ -31,11 +31,7 @@ GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" false "" "" true) # Makefile.L1 - -# TODO: need to read ${KERNELDIR}/KERNEL into CMake vars - foreach (float_type ${FLOAT_TYPES}) - # a bit of metaprogramming here to pull out the appropriate KERNEL var string(SUBSTRING ${float_type} 0 1 float_char) GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) @@ -58,27 +54,14 @@ foreach (float_type ${FLOAT_TYPES}) endforeach () # Makefile.L2 - GenerateNamedObjects("${KERNELDIR}/gemv_n.S" "DOUBLE") GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "TRANS") GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) # Makefile.L3 - -# TODO: these are from KERNEL.PENRYN - they should be read in from the appropriate ${KERNELDIR}/KERNEL file -set(DGEMM_BETA ../generic/gemm_beta.c) -set(DGEMMKERNEL gemm_kernel_2x4_penryn.S) -set(DGEMMINCOPY gemm_ncopy_2.S) -set(DGEMMITCOPY gemm_tcopy_2.S) -set(DGEMMONCOPY ../generic/gemm_ncopy_4.c) -set(DGEMMOTCOPY ../generic/gemm_tcopy_4.c) -set(DGEMMINCOPYOBJ gemm_incopy) -set(DGEMMITCOPYOBJ gemm_itcopy) -set(DGEMMONCOPYOBJ gemm_oncopy) -set(DGEMMOTCOPYOBJ gemm_otcopy) - -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" "" 3) +message(STATUS "dgemm: ${DGEMMKERNEL}") +GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false 3) if (DGEMMINCOPY) GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "" "${DGEMMINCOPYOBJ}") From 14fd3d35de9b077d771782e35a1de89d1cb9a615 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 18 Feb 2015 10:25:01 -0600 Subject: [PATCH 081/137] Added checks for missing defines in kernel. --- kernel/CMakeLists.txt | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 12c27fd50..9f07157e3 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -36,12 +36,20 @@ foreach (float_type ${FLOAT_TYPES}) string(SUBSTRING ${float_type} 0 1 float_char) GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type}) + if (DEFINED ${float_char}MAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type}) + endif () + if (DEFINED ${float_char}MINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type}) + endif () GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type}) + if (DEFINED I${float_char}MAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type}) + endif () + if (DEFINED I${float_char}MINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type}) + endif () GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) @@ -60,8 +68,10 @@ GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) # Makefile.L3 -message(STATUS "dgemm: ${DGEMMKERNEL}") -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false 3) +foreach (float_type ${FLOAT_TYPES}) + string(SUBSTRING ${float_type} 0 1 float_char) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) +endforeach () if (DGEMMINCOPY) GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "" "${DGEMMINCOPYOBJ}") From 43725b82c5fa459ecc0ec98d21cee4c751cd33fd Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 18 Feb 2015 12:23:17 -0600 Subject: [PATCH 082/137] ParseMakefileVars now replaces Makefile vars with CMake vars. --- cmake/utils.cmake | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index fbb546dbe..c77b762e6 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -14,7 +14,6 @@ endfunction () # Reads a Makefile into CMake vars. # TODO: respect IFDEF/IFNDEF? -# TODO: regex replace makefile vars, e.g. $(TSUFFIX) is set to the target arch in the var CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) macro(ParseMakefileVars MAKEFILE_IN) message(STATUS "Reading vars from ${MAKEFILE_IN}...") file(STRINGS ${MAKEFILE_IN} makefile_contents) @@ -23,14 +22,19 @@ macro(ParseMakefileVars MAKEFILE_IN) if (NOT "${line_match}" STREQUAL "") set(var_name ${CMAKE_MATCH_1}) set(var_value ${CMAKE_MATCH_2}) + # check for Makefile variables in the string, e.g. $(TSUFFIX) + string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value}) + foreach (make_var ${make_var_matches}) + # strip out Makefile $() markup + string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var}) + # now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote) + string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value}) + endforeach () set(${var_name} ${var_value}) - message(STATUS "found var ${var_name} = ${var_value}") else () string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}") if (NOT "${line_match}" STREQUAL "") ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1}) - else () - message(STATUS "couldn't parse ${makefile_line} into a var") endif () endif () endforeach () @@ -106,8 +110,10 @@ function(GenerateNamedObjects sources_in) set(defines_in ${ARGV1}) endif () - if (DEFINED ARGV2) + if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "") set(name_in ${ARGV2}) + # strip off extension for kernel files that pass in the object name. + get_filename_component(name_in ${name_in} NAME_WE) endif () if (DEFINED ARGV3) From 94922980486c297c919cfe44fc96a52a3557c1f6 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 18 Feb 2015 13:01:05 -0600 Subject: [PATCH 083/137] Added other float types to Makefile.L3. --- kernel/CMakeLists.txt | 194 +++++++++++++++++++++++------------------- 1 file changed, 105 insertions(+), 89 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 9f07157e3..be86094f1 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -71,97 +71,113 @@ GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) foreach (float_type ${FLOAT_TYPES}) string(SUBSTRING ${float_type} 0 1 float_char) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) + + if (${float_char}GEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "" "${${float_char}GEMMINCOPYOBJ}" false "" "" false ${float_type}) + endif () + + if (${float_char}GEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMITCOPY}" "" "${${float_char}GEMMITCOPYOBJ}" false "" "" false ${float_type}) + endif () + + if (${float_char}GEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMONCOPY}" "" "${${float_char}GEMMONCOPYOBJ}" false "" "" false ${float_type}) + endif () + + if (${float_char}GEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "" "${${float_char}GEMMOTCOPYOBJ}" false "" "" false ${float_type}) + endif () + + GenerateCombinationObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type}) + + # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. + # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "" "symm_iutcopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type}) + + if (NOT DEFINED ${float_char}OMATCOPY_CN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CN ../arm/zomatcopy_cn.c) + else () + set(${float_char}OMATCOPY_CN ../arm/omatcopy_cn.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RN ../arm/zomatcopy_rn.c) + else () + set(${float_char}OMATCOPY_RN ../arm/omatcopy_rn.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_CT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CT ../arm/zomatcopy_ct.c) + else () + set(${float_char}OMATCOPY_CT ../arm/omatcopy_ct.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RT ../arm/zomatcopy_rt.c) + else () + set(${float_char}OMATCOPY_RT ../arm/omatcopy_rt.c) + endif () + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type}) + endforeach () -if (DGEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "" "${DGEMMINCOPYOBJ}") -endif () - -if (DGEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "" "${DGEMMITCOPYOBJ}") -endif () - -if (DGEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "" "${DGEMMONCOPYOBJ}") -endif () - -if (DGEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "" "${DGEMMOTCOPYOBJ}") -endif () - -GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}") -GenerateCombinationObjects("${KERNELDIR}/${DGEMMKERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel") -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN") -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "LT;TRSMKERNEL" "trsm_kernel_LT") -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN") -GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "RT;TRSMKERNEL" "trsm_kernel_RT") - -# These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. -# Could simplify it a bit by pairing up by -UUNIT/-DUNIT. -GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy") -GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "" "trmm_iunncopy") -GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_ounucopy") -GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER" "trmm_ounncopy") - -GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy") -GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy") -GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy") -GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_olnncopy") - -GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy") -GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "" "trmm_iutncopy") -GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_outucopy") -GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER" "trmm_outncopy") - -GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy") -GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy") -GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy") -GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_oltncopy") - -GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy") -GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "" "trsm_iunncopy") -GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_ounucopy") -GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER" "trsm_ounncopy") - -GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy") -GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy") -GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy") -GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_olnncopy") - -GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy") -GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "" "trsm_iutncopy") -GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_outucopy") -GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER" "trsm_outncopy") - -GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy") -GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy") -GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy") -GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_oltncopy") - -GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "OUTER" "symm_outcopy") -GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "" "symm_iutcopy") - -GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy") -GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "LOWER" "symm_iltcopy") - -if (NOT DEFINED DOMATCOPY_CN) - set(DOMATCOPY_CN ../arm/omatcopy_cn.c) -endif () -if (NOT DEFINED DOMATCOPY_RN) - set(DOMATCOPY_RN ../arm/omatcopy_rn.c) -endif () -if (NOT DEFINED DOMATCOPY_CT) - set(DOMATCOPY_CT ../arm/omatcopy_ct.c) -endif () -if (NOT DEFINED DOMATCOPY_RT) - set(DOMATCOPY_RT ../arm/omatcopy_rt.c) -endif () - -GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CN}" "" "domatcopy_k_cn") -GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RN}" "ROWM" "domatcopy_k_rn") -GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CT}" "" "domatcopy_k_ct") -GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RT}" "ROWM" "domatcopy_k_rt") - # Makefile.LA #DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX) From f3f2b3d76836e5c2758be7053c7e4abbc3fac311 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 19 Feb 2015 12:26:11 -0600 Subject: [PATCH 084/137] Added complex and single netlib-lapack fortran sources to lapack.cmake. --- cmake/lapack.cmake | 411 +++++++++++++++++++++++++++++++++--------- kernel/CMakeLists.txt | 1 + lapack/CMakeLists.txt | 6 +- 3 files changed, 333 insertions(+), 85 deletions(-) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index 14581a9b2..e8d19f10d 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -1,96 +1,343 @@ # Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. set(ALLAUX - ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f - ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f + ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f + ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f ../INSTALL/ilaver.f ../INSTALL/slamch.f ) -set(DZLAUX - dbdsdc.f - dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f - dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f - dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f - dlagts.f dlamrg.f dlanst.f - dlapy2.f dlapy3.f dlarnv.f - dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f - dlarrk.f dlarrr.f dlaneg.f - dlartg.f dlaruv.f dlas2.f dlascl.f - dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f - dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f - dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f - dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f - dsteqr.f dsterf.f dlaisnan.f disnan.f - dlartgp.f dlartgs.f - ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f +set(SCLAUX + sbdsdc.f + sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f + slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f + slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f + slagts.f slamrg.f slanst.f + slapy2.f slapy3.f slarnv.f + slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f + slarrk.f slarrr.f slaneg.f + slartg.f slaruv.f slas2.f slascl.f + slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f + slasd7.f slasd8.f slasda.f slasdq.f slasdt.f + slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f + slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f + ssteqr.f ssterf.f slaisnan.f sisnan.f + slartgp.f slartgs.f + ../INSTALL/second_${TIMER}.f ) +set(DZLAUX + dbdsdc.f + dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f + dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f + dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f + dlagts.f dlamrg.f dlanst.f + dlapy2.f dlapy3.f dlarnv.f + dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f + dlarrk.f dlarrr.f dlaneg.f + dlartg.f dlaruv.f dlas2.f dlascl.f + dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f + dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f + dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f + dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f + dsteqr.f dsterf.f dlaisnan.f disnan.f + dlartgp.f dlartgs.f + ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f +) + +set(SLASRC + sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f + sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f + sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f + sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f + sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f + sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f + sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f + sgetc2.f sgetri.f + sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f + sggglm.f sgghrd.f sgglse.f sggqrf.f + sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f + sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f + shsein.f shseqr.f slabrd.f slacon.f slacn2.f + slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f + slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f + slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f + slansy.f slantb.f slantp.f slantr.f slanv2.f + slapll.f slapmt.f + slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f + slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f + slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f + slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f + slarrv.f slartv.f + slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f + slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f + sopgtr.f sopmtr.f sorg2l.f sorg2r.f + sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f + sorgrq.f sorgtr.f sorm2l.f sorm2r.f + sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f + sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f + spbstf.f spbsv.f spbsvx.f + spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f + sposvx.f spstrf.f spstf2.f + sppcon.f sppequ.f + spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f + spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f + ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f + ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f + sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f + ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f + sstevx.f + ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f + ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f + ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f + ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f + ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f + ssytri_rook.f ssycon_rook.f ssysv_rook.f + stbcon.f + stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f + stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f + stptrs.f + strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f + strtrs.f stzrqf.f stzrzf.f sstemr.f + slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f + stfttr.f stpttf.f stpttr.f strttf.f strttp.f + sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f + sgeequb.f ssyequb.f spoequb.f sgbequb.f + sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f + sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f + sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f + stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f +) + +set(DSLASRC spotrs.f) + +set(CLASRC + cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f + cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f + cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f + cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f + cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f + cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f + cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f + cgesvx.f cgetc2.f cgetri.f + cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f + cgghrd.f cgglse.f cggqrf.f cggrqf.f + cggsvd.f cggsvp.f + cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f + chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f + checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f + chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f + chetf2.f chetrd.f + chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f + chetrs.f chetrs2.f + chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f + chgeqz.f chpcon.f chpev.f chpevd.f + chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f + chpsvx.f + chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f + clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f + claed0.f claed7.f claed8.f + claein.f claesy.f claev2.f clags2.f clagtm.f + clahef.f clahef_rook.f clahqr.f + clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f + clanhb.f clanhe.f + clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f + clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f + claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f + claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f + claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f + clarf.f clarfb.f clarfg.f clarft.f clarfgp.f + clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f + clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f + clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f + clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f + cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f + cposv.f cposvx.f cpstrf.f cpstf2.f + cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f + cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f + crot.f cspcon.f csprfs.f cspsv.f + cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f + cstegr.f cstein.f csteqr.f + csycon.f + csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f + csyswapr.f csytrs.f csytrs2.f csyconv.f + csytf2_rook.f csytrf_rook.f csytrs_rook.f + csytri_rook.f csycon_rook.f csysv_rook.f + ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f + ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f + ctprfs.f ctptri.f + ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f + ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f + cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f + cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f + cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f + cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f + chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f + ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f + cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f + cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f + cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f + cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f + ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f +) + +set(ZCLASRC cpotrs.f) + set(DLASRC - dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f - dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f - dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f - dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f - dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f - dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f - dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f - dgetc2.f dgetri.f - dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f - dggglm.f dgghrd.f dgglse.f dggqrf.f - dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f - dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f - dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f - dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f - dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f - dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f - dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f - dlapll.f dlapmt.f - dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f - dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f - dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f - dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f - dlargv.f dlarrv.f dlartv.f - dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f - dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f - dopgtr.f dopmtr.f dorg2l.f dorg2r.f - dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f - dorgrq.f dorgtr.f dorm2l.f dorm2r.f - dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f - dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f - dpbstf.f dpbsv.f dpbsvx.f - dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f - dposvx.f dpotrs.f dpstrf.f dpstf2.f - dppcon.f dppequ.f - dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f - dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f - dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f - dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f - dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f - dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f - dstevx.f - dsycon.f dsyev.f dsyevd.f dsyevr.f - dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f - dsysv.f dsysvx.f - dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f - dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f - dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f - dsytri_rook.f dsycon_rook.f dsysv_rook.f - dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f - dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f - dtptrs.f - dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f - dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f - dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f - dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f - dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f - dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f - dgeequb.f dsyequb.f dpoequb.f dgbequb.f - dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f - dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f - dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f - dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f + dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f + dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f + dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f + dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f + dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f + dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f + dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f + dgetc2.f dgetri.f + dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f + dggglm.f dgghrd.f dgglse.f dggqrf.f + dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f + dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f + dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f + dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f + dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f + dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f + dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f + dlapll.f dlapmt.f + dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f + dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f + dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f + dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f + dlargv.f dlarrv.f dlartv.f + dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f + dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f + dopgtr.f dopmtr.f dorg2l.f dorg2r.f + dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f + dorgrq.f dorgtr.f dorm2l.f dorm2r.f + dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f + dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f + dpbstf.f dpbsv.f dpbsvx.f + dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f + dposvx.f dpotrs.f dpstrf.f dpstf2.f + dppcon.f dppequ.f + dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f + dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f + dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f + dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f + dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f + dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f + dstevx.f + dsycon.f dsyev.f dsyevd.f dsyevr.f + dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f + dsysv.f dsysvx.f + dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f + dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f + dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f + dsytri_rook.f dsycon_rook.f dsysv_rook.f + dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f + dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f + dtptrs.f + dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f + dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f + dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f + dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f + dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f + dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f + dgeequb.f dsyequb.f dpoequb.f dgbequb.f + dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f + dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f + dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f + dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f ) -set(LA_REL_SRC ${ALLAUX} ${DZLAUX} ${DLASRC}) +set(ZLASRC + zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f + zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f + zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f + zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f + zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f + zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f + zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f + zgetri.f + zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f + zgghrd.f zgglse.f zggqrf.f zggrqf.f + zggsvd.f zggsvp.f + zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f + zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f + zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f + zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f + zhetf2.f zhetrd.f + zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f + zhetrs.f zhetrs2.f + zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f + zhgeqz.f zhpcon.f zhpev.f zhpevd.f + zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f + zhpsvx.f + zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f + zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f + zlaed0.f zlaed7.f zlaed8.f + zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f + zlahef.f zlahef_rook.f zlahqr.f + zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f + zlangt.f zlanhb.f + zlanhe.f + zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f + zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f + zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f + zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f + zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f + zlarcm.f zlarf.f zlarfb.f + zlarfg.f zlarft.f zlarfgp.f + zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f + zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f + zlassq.f zlasyf.f zlasyf_rook.f + zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f + zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f + zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f + zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f + zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f + zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f + zrot.f zspcon.f zsprfs.f zspsv.f + zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f + zstegr.f zstein.f zsteqr.f + zsycon.f + zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f + zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f + zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f + zsytri_rook.f zsycon_rook.f zsysv_rook.f + ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f + ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f + ztprfs.f ztptri.f + ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f + ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f + zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f + zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f + zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f + zunmtr.f zupgtr.f + zupmtr.f izmax1.f dzsum1.f zstemr.f + zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f + zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f + ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f + zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f + zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f + zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f + zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f + ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f +) + +set(LA_REL_SRC ${ALLAUX}) +if (BUILD_SINGLE) + list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX}) +endif () + +if (BUILD_DOUBLE) + list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX}) +endif () + +if (BUILD_COMPLEX) + list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX}) +endif () + +if (BUILD_COMPLEX16) + list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX}) +endif () # add lapack-netlib folder to the sources set(LA_SOURCES "") diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index be86094f1..f2b66ba1d 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -88,6 +88,7 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "" "${${float_char}GEMMOTCOPYOBJ}" false "" "" false ${float_type}) endif () + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "" false "" "" false ${float_type}) GenerateCombinationObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type}) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 26922f50e..7e17de7de 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -58,11 +58,11 @@ set(ZLAPACK_SOURCES ) GenerateNamedObjects("${LAPACK_SOURCES}") -GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" "" 3) +GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) # TODO: laswp needs arch specific code -GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus") -GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus") +GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus" false "" "" false 3) +GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus" false "" "" false 3) if (SMP) From e27c372e53fcaceed66193440fbbd450a8d6e251 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 19 Feb 2015 13:53:29 -0600 Subject: [PATCH 085/137] Fixed reuse of float_char from parent loop. Fixed in/it/on/otcopy names. --- cmake/utils.cmake | 1 + kernel/CMakeLists.txt | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index c77b762e6..b8f166fb0 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -165,6 +165,7 @@ function(GenerateNamedObjects sources_in) list(REMOVE_ITEM float_list "ZCOMPLEX") endif () + set(float_char "") set(OBJ_LIST_OUT "") foreach (float_type ${float_list}) foreach (source_file ${sources_in}) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index f2b66ba1d..50dbabb91 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -73,19 +73,19 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) if (${float_char}GEMMINCOPY) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "" "${${float_char}GEMMINCOPYOBJ}" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type}) endif () if (${float_char}GEMMITCOPY) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMITCOPY}" "" "${${float_char}GEMMITCOPYOBJ}" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMITCOPY}" "${float_type}" "${${float_char}GEMMITCOPYOBJ}" false "" "" true ${float_type}) endif () if (${float_char}GEMMONCOPY) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMONCOPY}" "" "${${float_char}GEMMONCOPYOBJ}" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMONCOPY}" "${float_type}" "${${float_char}GEMMONCOPYOBJ}" false "" "" true ${float_type}) endif () if (${float_char}GEMMOTCOPY) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "" "${${float_char}GEMMOTCOPYOBJ}" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "${float_type}" "${${float_char}GEMMOTCOPYOBJ}" false "" "" true ${float_type}) endif () GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "" false "" "" false ${float_type}) From 714638c187deff177fb6045634f7236218c610b7 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 19 Feb 2015 16:11:51 -0600 Subject: [PATCH 086/137] Added some TRMM objects for complex types. --- cmake/utils.cmake | 1 - kernel/CMakeLists.txt | 43 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index b8f166fb0..b706dfc03 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -202,7 +202,6 @@ function(GenerateNamedObjects sources_in) endif () list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") - list(APPEND obj_defines ${defines_in}) if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX") list(APPEND obj_defines "DOUBLE") endif () diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 50dbabb91..d91b288fd 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -68,6 +68,17 @@ GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) # Makefile.L3 +set(USE_GEMM3M false) +set(USE_TRMM false) + +if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") + set(USE_GEMM3M true) +endif () + +if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC") + set(USE_TRMM true) +endif () + foreach (float_type ${FLOAT_TYPES}) string(SUBSTRING ${float_type} 0 1 float_char) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) @@ -88,12 +99,32 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMOTCOPY}" "${float_type}" "${${float_char}GEMMOTCOPYOBJ}" false "" "" true ${float_type}) endif () - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "" false "" "" false ${float_type}) - GenerateCombinationObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMM_BETA}" "" "gemm_beta" false "" "" false ${float_type}) + + if (USE_TRMM) + set(TRMM_KERNEL "${${float_char}TRMMKERNEL}") + else () + set(TRMM_KERNEL "${${float_char}GEMMKERNEL}") + endif () + + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + # screw it, just enumerate all these. there is an extra define for these indicating which side is a conjugate (e.g. CN NC NN) that I don't really want to work into GenerateCombinationObjects + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA;CONJ;CN" "trmm_kernel_LC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "NN" "trmm_kernel_RN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type}) + else () + GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type}) # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. From e5897ecb9bb152c5ea25491a14f838ae1d90b0a1 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 19 Feb 2015 16:19:56 -0600 Subject: [PATCH 087/137] Added zherk_kernel.c objects to driver/level3. --- driver/level3/CMakeLists.txt | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 7259a87e7..53c72538b 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -3,19 +3,7 @@ include_directories(${CMAKE_SOURCE_DIR}) set(USE_GEMM3M 0) if (DEFINED ARCH) - if (${ARCH} STREQUAL "x86") - set(USE_GEMM3M 1) - endif () - - if (${ARCH} STREQUAL "x86_64") - set(USE_GEMM3M 1) - endif () - - if (${ARCH} STREQUAL "ia64") - set(USE_GEMM3M 1) - endif () - - if (${ARCH} STREQUAL "MIPS") + if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") set(USE_GEMM3M 1) endif () endif () @@ -48,6 +36,14 @@ if (SMP) endif () endif () +foreach (float_type ${FLOAT_TYPES}) + set(VERBOSE_GEN true) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateCombinationObjects("zherk_kernel" "LOWER;CONJ" "U;N" "HERK" 2 "herk_kernel" false ${float_type}) + endif () + set(VERBOSE_GEN false) +endforeach () + #HPLOBJS = # dgemm_nn.c dgemm_nt.c dgemm_tn.c dgemm_tt.c # dtrsm_LNUU.c dtrsm_LNUN.c dtrsm_LNLU.c dtrsm_LNLN.c From 8a143516e33927fd0ef047452313e2d9026c2f89 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 20 Feb 2015 17:03:33 -0600 Subject: [PATCH 088/137] Added alternate_name to a couple of the name mangling schemes. Added zherk_k sources to driver/level3. --- cmake/utils.cmake | 17 ++++++++++++++--- driver/level3/CMakeLists.txt | 9 ++++++--- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index b706dfc03..11f2babd5 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -241,14 +241,17 @@ endfunction () # @param complex_filename_scheme see GenerateNamedObjects function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme) + set(alternate_name_in "") if (DEFINED ARGV5) - set(alternate_name ${ARGV5}) + set(alternate_name_in ${ARGV5}) endif () + set(no_float_type false) if (DEFINED ARGV6) set(no_float_type ${ARGV6}) endif () + set(complex_filename_scheme "") if (DEFINED ARGV7) set(complex_filename_scheme ${ARGV7}) endif () @@ -268,6 +271,8 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de foreach (source_file ${sources_in}) + set(alternate_name ${alternate_name_in}) + # replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with string(REPLACE ":" ";" define_combo ${define_combo}) @@ -287,8 +292,12 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de if (replace_scheme EQUAL 2) set(append_code "_${define_code}") elseif (replace_scheme EQUAL 3) + if ("${alternate_name}" STREQUAL "") + string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file}) + else () + string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name}) + endif () # first extract the last letter - string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file}) string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match # break the code up into the first letter and the remaining (should only be 2 anyway) string(SUBSTRING ${define_code} 0 1 define_code_first) @@ -296,7 +305,9 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de set(replace_code "${define_code_first}${last_letter}${define_code_second}") elseif (replace_scheme EQUAL 4) # insert code before the last underscore and pass that in as the alternate_name - get_filename_component(alternate_name ${source_file} NAME_WE) + if ("${alternate_name}" STREQUAL "") + get_filename_component(alternate_name ${source_file} NAME_WE) + endif () set(extra_underscore "") # check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel) string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name}) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 53c72538b..d9c66db59 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -37,11 +37,14 @@ if (SMP) endif () foreach (float_type ${FLOAT_TYPES}) - set(VERBOSE_GEN true) if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") - GenerateCombinationObjects("zherk_kernel" "LOWER;CONJ" "U;N" "HERK" 2 "herk_kernel" false ${float_type}) + GenerateCombinationObjects("zherk_kernel.c" "LOWER;CONJ" "U;N" "HERK" 2 "herk_kernel" false ${float_type}) + # TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination + GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type}) + GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type}) + GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type}) + GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) endif () - set(VERBOSE_GEN false) endforeach () #HPLOBJS = From 371071d461e37f2a1c62a6cec8ac40ad2190b75e Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Sat, 21 Feb 2015 10:59:02 -0600 Subject: [PATCH 089/137] Added CONJ defines for trmm/trsm. --- driver/level3/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index d9c66db59..85bde071d 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -44,6 +44,15 @@ foreach (float_type ${FLOAT_TYPES}) GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type}) GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type}) GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) + # Need to set CONJ for trmm and trsm + GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type}) + GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trmm_LC" false ${float_type}) + GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_RR" false ${float_type}) + GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trmm_RC" false ${float_type}) + GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_LR" false ${float_type}) + GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type}) + GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) + GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type}) endif () endforeach () From fb5d5bb9717e5dde95857cdc03e0e7f0dd86f246 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Sat, 21 Feb 2015 12:39:03 -0600 Subject: [PATCH 090/137] Added defines for complex trmv. --- driver/level2/CMakeLists.txt | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index a1685dbd6..8b37917a6 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -17,13 +17,11 @@ set(NU_SOURCES tbsv_U.c tpmv_U.c tpsv_U.c - trmv_U.c trsv_U.c tbmv_L.c tbsv_L.c tpmv_L.c tpsv_L.c - trmv_L.c trsv_L.c ) @@ -38,6 +36,22 @@ GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "" 3 "" "" 3) GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) +# special defines for complex trmv +foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateCombinationObjects("ztrmv_U.c" "UNIT" "N" "TRANSA=1" 0 "trmv_NU" false ${float_type}) + GenerateCombinationObjects("ztrmv_U.c" "UNIT" "N" "TRANSA=2" 0 "trmv_TL" false ${float_type}) + GenerateCombinationObjects("ztrmv_U.c" "UNIT" "N" "TRANSA=3" 0 "trmv_RU" false ${float_type}) + GenerateCombinationObjects("ztrmv_U.c" "UNIT" "N" "TRANSA=4" 0 "trmv_CL" false ${float_type}) + GenerateCombinationObjects("ztrmv_L.c" "UNIT" "N" "TRANSA=1" 0 "trmv_NL" false ${float_type}) + GenerateCombinationObjects("ztrmv_L.c" "UNIT" "N" "TRANSA=2" 0 "trmv_TU" false ${float_type}) + GenerateCombinationObjects("ztrmv_L.c" "UNIT" "N" "TRANSA=3" 0 "trmv_RL" false ${float_type}) + GenerateCombinationObjects("ztrmv_L.c" "UNIT" "N" "TRANSA=4" 0 "trmv_CU" false ${float_type}) + else () + GenerateCombinationObjects("trmv_U.c;trmv_L.c" "TRANSA;UNIT" "N;N" "" 3 "" false ${float_type}) + endif () +endforeach () + if (SMP) # gbmv uses a lowercase n and t. N.B. this uses TRANSA where gbmv.c uses TRANS. Intentional? @@ -69,6 +83,13 @@ if (SMP) GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "" 2) + foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateCombinationObjects("trmv_thread.c" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "trmv_thread_R" false ${float_type}) + GenerateCombinationObjects("trmv_thread.c" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "trmv_thread_C" false ${float_type}) + endif () + endforeach () + endif () set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS From a6116e585974950cc5a4ccbadf519fb647767455 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Sun, 22 Feb 2015 17:49:28 -0600 Subject: [PATCH 091/137] Added some more complex-only objects. --- cmake/utils.cmake | 2 +- interface/CMakeLists.txt | 18 ++++++++++++++++++ kernel/CMakeLists.txt | 11 ++++++++--- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 11f2babd5..edf25cdb2 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -321,7 +321,7 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de endif () endif () - GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}") + GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}") list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}") endforeach () endforeach () diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 633b8a6fe..33464c3cd 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -84,6 +84,24 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS}) endforeach () +# complex-specific sources +foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type}) + GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type}) + endif () + if (${float_type} STREQUAL "COMPLEX") + GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX") + GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX") + endif () + + if (${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX") + GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX") + endif () +endforeach () + + if (NOT DEFINED NO_LAPACK) set(LAPACK_SOURCES lapack/gesv.c diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index d91b288fd..84ec428ba 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -16,8 +16,6 @@ else () endif () SetDefaultL1() -#-include $(KERNELDIR)/KERNEL.$(TARGET_CORE) -#include $(KERNELDIR)/KERNEL ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") ParseMakefileVars("${KERNELDIR}/KERNEL") @@ -53,12 +51,19 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) + + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dotu_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "CONJ" "dotc_k" false "" "" false ${float_type}) + else () + GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) + endif () endforeach () # Makefile.L2 From b2284647a3dc3192d08763b835f69bd6df61ea04 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 23 Feb 2015 07:51:05 -0600 Subject: [PATCH 092/137] More complex objects. --- interface/CMakeLists.txt | 1 - kernel/CMakeLists.txt | 24 +++++++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 33464c3cd..1ca554307 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -94,7 +94,6 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX") GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX") endif () - if (${float_type} STREQUAL "ZCOMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX") GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX") diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 84ec428ba..656090cf8 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -71,6 +71,15 @@ GenerateNamedObjects("${KERNELDIR}/gemv_n.S" "DOUBLE") GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "TRANS") GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) +foreach (float_type ${FLOAT_TYPES}) + string(SUBSTRING ${float_type} 0 1 float_char) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "XCONJ" "gerv_k" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ;XCONJ" "gerd_k" false "" "" false ${float_type}) + endif () +endforeach () # Makefile.L3 set(USE_GEMM3M false) @@ -113,7 +122,14 @@ foreach (float_type ${FLOAT_TYPES}) endif () if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") - # screw it, just enumerate all these. there is an extra define for these indicating which side is a conjugate (e.g. CN NC NN) that I don't really want to work into GenerateCombinationObjects + + # just enumerate all these. there is an extra define for these indicating which side is a conjugate (e.g. CN NC NN) that I don't really want to work into GenerateCombinationObjects + + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type}) @@ -122,6 +138,12 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "UPPER;LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) + else () GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) endif () From 1b7f427401a93626097304b5d84f78163e478c5e Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Mon, 23 Feb 2015 10:24:31 -0600 Subject: [PATCH 093/137] Added conj gemv objects for complex build. --- cmake/kernel.cmake | 268 +++++++++++++++++++++-------------- driver/level2/CMakeLists.txt | 39 +++-- kernel/CMakeLists.txt | 15 +- 3 files changed, 202 insertions(+), 120 deletions(-) diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake index 211da229d..3a4d13837 100644 --- a/cmake/kernel.cmake +++ b/cmake/kernel.cmake @@ -1,110 +1,158 @@ -# helper functions for the kernel CMakeLists.txt - - -# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file. -macro(SetDefaultL1) - set(SAMAXKERNEL amax.S) - set(DAMAXKERNEL amax.S) - set(QAMAXKERNEL amax.S) - set(CAMAXKERNEL zamax.S) - set(ZAMAXKERNEL zamax.S) - set(XAMAXKERNEL zamax.S) - set(SAMINKERNEL amin.S) - set(DAMINKERNEL amin.S) - set(QAMINKERNEL amin.S) - set(CAMINKERNEL zamin.S) - set(ZAMINKERNEL zamin.S) - set(XAMINKERNEL zamin.S) - set(SMAXKERNEL max.S) - set(DMAXKERNEL max.S) - set(QMAXKERNEL max.S) - set(SMINKERNEL min.S) - set(DMINKERNEL min.S) - set(QMINKERNEL min.S) - set(ISAMAXKERNEL iamax.S) - set(IDAMAXKERNEL iamax.S) - set(IQAMAXKERNEL iamax.S) - set(ICAMAXKERNEL izamax.S) - set(IZAMAXKERNEL izamax.S) - set(IXAMAXKERNEL izamax.S) - set(ISAMINKERNEL iamin.S) - set(IDAMINKERNEL iamin.S) - set(IQAMINKERNEL iamin.S) - set(ICAMINKERNEL izamin.S) - set(IZAMINKERNEL izamin.S) - set(IXAMINKERNEL izamin.S) - set(ISMAXKERNEL iamax.S) - set(IDMAXKERNEL iamax.S) - set(IQMAXKERNEL iamax.S) - set(ISMINKERNEL iamin.S) - set(IDMINKERNEL iamin.S) - set(IQMINKERNEL iamin.S) - set(SASUMKERNEL asum.S) - set(DASUMKERNEL asum.S) - set(CASUMKERNEL zasum.S) - set(ZASUMKERNEL zasum.S) - set(QASUMKERNEL asum.S) - set(XASUMKERNEL zasum.S) - set(SAXPYKERNEL axpy.S) - set(DAXPYKERNEL axpy.S) - set(CAXPYKERNEL zaxpy.S) - set(ZAXPYKERNEL zaxpy.S) - set(QAXPYKERNEL axpy.S) - set(XAXPYKERNEL zaxpy.S) - set(SCOPYKERNEL copy.S) - set(DCOPYKERNEL copy.S) - set(CCOPYKERNEL zcopy.S) - set(ZCOPYKERNEL zcopy.S) - set(QCOPYKERNEL copy.S) - set(XCOPYKERNEL zcopy.S) - set(SDOTKERNEL dot.S) - set(DDOTKERNEL dot.S) - set(CDOTKERNEL zdot.S) - set(ZDOTKERNEL zdot.S) - set(QDOTKERNEL dot.S) - set(XDOTKERNEL zdot.S) - set(SNRM2KERNEL nrm2.S) - set(DNRM2KERNEL nrm2.S) - set(QNRM2KERNEL nrm2.S) - set(CNRM2KERNEL znrm2.S) - set(ZNRM2KERNEL znrm2.S) - set(XNRM2KERNEL znrm2.S) - set(SROTKERNEL rot.S) - set(DROTKERNEL rot.S) - set(QROTKERNEL rot.S) - set(CROTKERNEL zrot.S) - set(ZROTKERNEL zrot.S) - set(XROTKERNEL zrot.S) - set(SSCALKERNEL scal.S) - set(DSCALKERNEL scal.S) - set(CSCALKERNEL zscal.S) - set(ZSCALKERNEL zscal.S) - set(QSCALKERNEL scal.S) - set(XSCALKERNEL zscal.S) - set(SSWAPKERNEL swap.S) - set(DSWAPKERNEL swap.S) - set(CSWAPKERNEL zswap.S) - set(ZSWAPKERNEL zswap.S) - set(QSWAPKERNEL swap.S) - set(XSWAPKERNEL zswap.S) - set(SGEMVNKERNEL gemv_n.S) - set(SGEMVTKERNEL gemv_t.S) - set(DGEMVNKERNEL gemv_n.S) - set(DGEMVTKERNEL gemv_t.S) - set(CGEMVNKERNEL zgemv_n.S) - set(CGEMVTKERNEL zgemv_t.S) - set(ZGEMVNKERNEL zgemv_n.S) - set(ZGEMVTKERNEL zgemv_t.S) - set(QGEMVNKERNEL gemv_n.S) - set(QGEMVTKERNEL gemv_t.S) - set(XGEMVNKERNEL zgemv_n.S) - set(XGEMVTKERNEL zgemv_t.S) - set(SCABS_KERNEL cabs.S) - set(DCABS_KERNEL cabs.S) - set(QCABS_KERNEL cabs.S) - set(LSAME_KERNEL lsame.S) - set(SAXPBYKERNEL ../arm/axpby.c) - set(DAXPBYKERNEL ../arm/axpby.c) - set(CAXPBYKERNEL ../arm/zaxpby.c) - set(ZAXPBYKERNEL ../arm/zaxpby.c) -endmacro () +# helper functions for the kernel CMakeLists.txt + + +# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file. +macro(SetDefaultL1) + set(SAMAXKERNEL amax.S) + set(DAMAXKERNEL amax.S) + set(QAMAXKERNEL amax.S) + set(CAMAXKERNEL zamax.S) + set(ZAMAXKERNEL zamax.S) + set(XAMAXKERNEL zamax.S) + set(SAMINKERNEL amin.S) + set(DAMINKERNEL amin.S) + set(QAMINKERNEL amin.S) + set(CAMINKERNEL zamin.S) + set(ZAMINKERNEL zamin.S) + set(XAMINKERNEL zamin.S) + set(SMAXKERNEL max.S) + set(DMAXKERNEL max.S) + set(QMAXKERNEL max.S) + set(SMINKERNEL min.S) + set(DMINKERNEL min.S) + set(QMINKERNEL min.S) + set(ISAMAXKERNEL iamax.S) + set(IDAMAXKERNEL iamax.S) + set(IQAMAXKERNEL iamax.S) + set(ICAMAXKERNEL izamax.S) + set(IZAMAXKERNEL izamax.S) + set(IXAMAXKERNEL izamax.S) + set(ISAMINKERNEL iamin.S) + set(IDAMINKERNEL iamin.S) + set(IQAMINKERNEL iamin.S) + set(ICAMINKERNEL izamin.S) + set(IZAMINKERNEL izamin.S) + set(IXAMINKERNEL izamin.S) + set(ISMAXKERNEL iamax.S) + set(IDMAXKERNEL iamax.S) + set(IQMAXKERNEL iamax.S) + set(ISMINKERNEL iamin.S) + set(IDMINKERNEL iamin.S) + set(IQMINKERNEL iamin.S) + set(SASUMKERNEL asum.S) + set(DASUMKERNEL asum.S) + set(CASUMKERNEL zasum.S) + set(ZASUMKERNEL zasum.S) + set(QASUMKERNEL asum.S) + set(XASUMKERNEL zasum.S) + set(SAXPYKERNEL axpy.S) + set(DAXPYKERNEL axpy.S) + set(CAXPYKERNEL zaxpy.S) + set(ZAXPYKERNEL zaxpy.S) + set(QAXPYKERNEL axpy.S) + set(XAXPYKERNEL zaxpy.S) + set(SCOPYKERNEL copy.S) + set(DCOPYKERNEL copy.S) + set(CCOPYKERNEL zcopy.S) + set(ZCOPYKERNEL zcopy.S) + set(QCOPYKERNEL copy.S) + set(XCOPYKERNEL zcopy.S) + set(SDOTKERNEL dot.S) + set(DDOTKERNEL dot.S) + set(CDOTKERNEL zdot.S) + set(ZDOTKERNEL zdot.S) + set(QDOTKERNEL dot.S) + set(XDOTKERNEL zdot.S) + set(SNRM2KERNEL nrm2.S) + set(DNRM2KERNEL nrm2.S) + set(QNRM2KERNEL nrm2.S) + set(CNRM2KERNEL znrm2.S) + set(ZNRM2KERNEL znrm2.S) + set(XNRM2KERNEL znrm2.S) + set(SROTKERNEL rot.S) + set(DROTKERNEL rot.S) + set(QROTKERNEL rot.S) + set(CROTKERNEL zrot.S) + set(ZROTKERNEL zrot.S) + set(XROTKERNEL zrot.S) + set(SSCALKERNEL scal.S) + set(DSCALKERNEL scal.S) + set(CSCALKERNEL zscal.S) + set(ZSCALKERNEL zscal.S) + set(QSCALKERNEL scal.S) + set(XSCALKERNEL zscal.S) + set(SSWAPKERNEL swap.S) + set(DSWAPKERNEL swap.S) + set(CSWAPKERNEL zswap.S) + set(ZSWAPKERNEL zswap.S) + set(QSWAPKERNEL swap.S) + set(XSWAPKERNEL zswap.S) + set(SGEMVNKERNEL gemv_n.S) + set(SGEMVTKERNEL gemv_t.S) + set(DGEMVNKERNEL gemv_n.S) + set(DGEMVTKERNEL gemv_t.S) + set(CGEMVNKERNEL zgemv_n.S) + set(CGEMVTKERNEL zgemv_t.S) + set(ZGEMVNKERNEL zgemv_n.S) + set(ZGEMVTKERNEL zgemv_t.S) + set(QGEMVNKERNEL gemv_n.S) + set(QGEMVTKERNEL gemv_t.S) + set(XGEMVNKERNEL zgemv_n.S) + set(XGEMVTKERNEL zgemv_t.S) + set(SCABS_KERNEL cabs.S) + set(DCABS_KERNEL cabs.S) + set(QCABS_KERNEL cabs.S) + set(LSAME_KERNEL lsame.S) + set(SAXPBYKERNEL ../arm/axpby.c) + set(DAXPBYKERNEL ../arm/axpby.c) + set(CAXPBYKERNEL ../arm/zaxpby.c) + set(ZAXPBYKERNEL ../arm/zaxpby.c) +endmacro () + +macro(SetDefaultL2) + set(SGEMVNKERNEL gemv_n.S) + set(SGEMVTKERNEL gemv_t.S) + set(DGEMVNKERNEL gemv_n.S) + set(DGEMVTKERNEL gemv_t.S) + set(CGEMVNKERNEL zgemv_n.S) + set(CGEMVTKERNEL zgemv_t.S) + set(ZGEMVNKERNEL zgemv_n.S) + set(ZGEMVTKERNEL zgemv_t.S) + set(QGEMVNKERNEL gemv_n.S) + set(QGEMVTKERNEL gemv_t.S) + set(XGEMVNKERNEL zgemv_n.S) + set(XGEMVTKERNEL zgemv_t.S) + set(SGERKERNEL ../generic/ger.c) + set(DGERKERNEL ../generic/ger.c) + set(QGERKERNEL ../generic/ger.c) + set(CGERUKERNEL ../generic/zger.c) + set(CGERCKERNEL ../generic/zger.c) + set(ZGERUKERNEL ../generic/zger.c) + set(ZGERCKERNEL ../generic/zger.c) + set(XGERUKERNEL ../generic/zger.c) + set(XGERCKERNEL ../generic/zger.c) + set(SSYMV_U_KERNEL ../generic/symv_k.c) + set(SSYMV_L_KERNEL ../generic/symv_k.c) + set(DSYMV_U_KERNEL ../generic/symv_k.c) + set(DSYMV_L_KERNEL ../generic/symv_k.c) + set(QSYMV_U_KERNEL ../generic/symv_k.c) + set(QSYMV_L_KERNEL ../generic/symv_k.c) + set(CSYMV_U_KERNEL ../generic/zsymv_k.c) + set(CSYMV_L_KERNEL ../generic/zsymv_k.c) + set(ZSYMV_U_KERNEL ../generic/zsymv_k.c) + set(ZSYMV_L_KERNEL ../generic/zsymv_k.c) + set(XSYMV_U_KERNEL ../generic/zsymv_k.c) + set(XSYMV_L_KERNEL ../generic/zsymv_k.c) + set(CHEMV_U_KERNEL ../generic/zhemv_k.c) + set(CHEMV_L_KERNEL ../generic/zhemv_k.c) + set(CHEMV_V_KERNEL ../generic/zhemv_k.c) + set(CHEMV_M_KERNEL ../generic/zhemv_k.c) + set(ZHEMV_U_KERNEL ../generic/zhemv_k.c) + set(ZHEMV_L_KERNEL ../generic/zhemv_k.c) + set(ZHEMV_V_KERNEL ../generic/zhemv_k.c) + set(ZHEMV_M_KERNEL ../generic/zhemv_k.c) + set(XHEMV_U_KERNEL ../generic/zhemv_k.c) + set(XHEMV_L_KERNEL ../generic/zhemv_k.c) + set(XHEMV_V_KERNEL ../generic/zhemv_k.c) + set(XHEMV_M_KERNEL ../generic/zhemv_k.c) +endmacro () diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 8b37917a6..54e0eb42f 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -54,14 +54,6 @@ endforeach () if (SMP) - # gbmv uses a lowercase n and t. N.B. this uses TRANSA where gbmv.c uses TRANS. Intentional? - GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n") - GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t") - - GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n") - GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t") - - GenerateNamedObjects("ger_thread.c") set(UL_SMP_SOURCES symv_thread.c @@ -84,10 +76,41 @@ if (SMP) GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "" 2) foreach (float_type ${FLOAT_TYPES}) + + GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type}) + + GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t" false "" "" false ${float_type}) + + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") GenerateCombinationObjects("trmv_thread.c" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "trmv_thread_R" false ${float_type}) GenerateCombinationObjects("trmv_thread.c" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "trmv_thread_C" false ${float_type}) + + GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "XCONJ" "gemv_thread_o" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "XCONJ;TRANSA" "gemv_thread_u" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ" "gemv_thread_s" false "" "" false ${float_type}) + GenerateNamedObjects("gemv_thread.c" "XCONJ;CONJ;TRANSA" "gemv_thread_d" false "" "" false ${float_type}) + + GenerateNamedObjects("gbmv_thread.c" "CONJ" "gbmv_thread_r" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "CONJ;TRANSA" "gbmv_thread_c" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "XCONJ" "gbmv_thread_o" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "XCONJ;TRANSA" "gbmv_thread_u" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ" "gbmv_thread_s" false "" "" false ${float_type}) + GenerateNamedObjects("gbmv_thread.c" "XCONJ;CONJ;TRANSA" "gbmv_thread_d" false "" "" false ${float_type}) + + GenerateNamedObjects("ger_thread.c" "" "ger_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("ger_thread.c" "CONJ" "ger_thread_C" false "" "" false ${float_type}) + GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) + + else () + GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type}) endif () + endforeach () endif () diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 656090cf8..cacd0f38f 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -67,8 +67,7 @@ foreach (float_type ${FLOAT_TYPES}) endforeach () # Makefile.L2 -GenerateNamedObjects("${KERNELDIR}/gemv_n.S" "DOUBLE") -GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "TRANS") +SetDefaultL2() GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) foreach (float_type ${FLOAT_TYPES}) @@ -78,6 +77,18 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "XCONJ" "gerv_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ;XCONJ" "gerd_k" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "CONJ;TRANSA" "${KERNELDIR}/${${float_char}GEMVUKERNEL}" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) + else () + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) endif () endforeach () From 12d1fb2e4076bd6e8631cb0c2a5e941f038334e1 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 24 Feb 2015 10:30:16 -0600 Subject: [PATCH 094/137] Fixed incorrect object name in kernel CMakeLists.txt --- kernel/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index cacd0f38f..939eef6c7 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -81,7 +81,7 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "CONJ;TRANSA" "${KERNELDIR}/${${float_char}GEMVUKERNEL}" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) From 0d8e227ea76514ce18db97a544197ec5c49d0c96 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 24 Feb 2015 12:26:33 -0600 Subject: [PATCH 095/137] Changed strategy for setting preprocessor definitions. Instead of generating separate object files for each permutation of defines for a source file, GenerateNamedObjects now writes an entirely new source file and inserts the defines as #define c statements. This solves a problem I ran into with ar.exe where it was refusing to link objects that had the same filename despite having different paths. --- CMakeLists.txt | 16 ++++------------ cmake/utils.cmake | 29 ++++++++++++++++++++--------- driver/level2/CMakeLists.txt | 3 +-- driver/level3/CMakeLists.txt | 3 +-- driver/others/CMakeLists.txt | 11 +---------- interface/CMakeLists.txt | 3 +-- kernel/CMakeLists.txt | 2 +- lapack/CMakeLists.txt | 2 +- 8 files changed, 30 insertions(+), 39 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 85b20b176..530f3dda3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,24 +75,16 @@ if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") endif () -# Let CMake handle this -#if (${NOFORTRAN}) -# message(ERROR "OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.") -#endif () - if (${NO_STATIC} AND ${NO_SHARED}) message(FATAL_ERROR "Neither static nor shared are enabled.") endif () -set(DBLAS_OBJS "") -foreach (SUBDIR ${SUBDIRS}) - add_subdirectory(${SUBDIR}) -endforeach () - # get obj vars into format that add_library likes: $ (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) set(TARGET_OBJS "") -foreach (DBLAS_OBJ ${DBLAS_OBJS}) - list(APPEND TARGET_OBJS "$") +foreach (SUBDIR ${SUBDIRS}) + add_subdirectory(${SUBDIR}) + string(REPLACE "/" "_" subdir_obj ${SUBDIR}) + list(APPEND TARGET_OBJS "$") endforeach () # netlib: diff --git a/cmake/utils.cmake b/cmake/utils.cmake index edf25cdb2..498c3840a 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -13,7 +13,6 @@ function(ParseGetArchVars GETARCH_IN) endfunction () # Reads a Makefile into CMake vars. -# TODO: respect IFDEF/IFNDEF? macro(ParseMakefileVars MAKEFILE_IN) message(STATUS "Reading vars from ${MAKEFILE_IN}...") file(STRINGS ${MAKEFILE_IN} makefile_contents) @@ -215,16 +214,30 @@ function(GenerateNamedObjects sources_in) endif () endif () - add_library(${obj_name} OBJECT ${source_file}) - set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${obj_defines}") + if (VERBOSE_GEN) + message(STATUS "${obj_name}:${source_file}") + message(STATUS "${obj_defines}") + endif () - list(APPEND OBJ_LIST_OUT ${obj_name}) + # create a copy of the source to avoid duplicate obj filename problem with ar.exe + get_filename_component(source_extension ${source_file} EXT) + set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}") + if (IS_ABSOLUTE ${source_file}) + set(old_source_file ${source_file}) + else () + set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}") + endif () + + string(REPLACE ";" "\n#define " define_source "${obj_defines}") + string(REPLACE "=" " " define_source "${define_source}") + file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"") + list(APPEND SRC_LIST_OUT ${new_source_file}) endforeach () endforeach () - list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) - set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) + list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT}) + set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE) endfunction () # generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in @@ -260,7 +273,6 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de set(define_combos ${LIST_OUT}) set(define_codes ${CODES_OUT}) - set(COMBO_OBJ_LIST_OUT "") list(LENGTH define_combos num_combos) math(EXPR num_combos "${num_combos} - 1") @@ -322,10 +334,9 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de endif () GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}") - list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}") endforeach () endforeach () - set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) + set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE) endfunction () diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 54e0eb42f..be275724f 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -115,5 +115,4 @@ if (SMP) endif () -set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS - +add_library(driver_level2 OBJECT ${OPENBLAS_SRC}) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 85bde071d..fac96cc82 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -69,5 +69,4 @@ endforeach () #endif # -set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS - +add_library(driver_level3 OBJECT ${OPENBLAS_SRC}) diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index 3e17ce5be..938f1daaf 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -62,14 +62,6 @@ endif () #COMMONOBJS += profile.$(SUFFIX) #endif -add_library(COMMON_OBJS OBJECT - ${MEMORY} - ${SMP_SOURCES} - ${COMMON_SOURCES} -) - -list(APPEND DBLAS_OBJS "COMMON_OBJS") - #LIBOTHERS = libothers.$(LIBSUFFIX) #ifeq ($(DYNAMIC_ARCH), 1) @@ -78,5 +70,4 @@ list(APPEND DBLAS_OBJS "COMMON_OBJS") #HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) #endif -set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS - +add_library(driver_others OBJECT ${OPENBLAS_SRC} ${MEMORY} ${SMP_SOURCES} ${COMMON_SOURCES}) diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 1ca554307..ae949235b 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -117,5 +117,4 @@ if (NOT DEFINED NO_LAPACK) GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) endif () -set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS - +add_library(interface OBJECT ${OPENBLAS_SRC}) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 939eef6c7..368af90b2 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -251,4 +251,4 @@ endforeach () # Makefile.LA #DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX) -set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS +add_library(kernel OBJECT ${OPENBLAS_SRC}) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index 7e17de7de..c8c82219d 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -99,5 +99,5 @@ GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" "" 3) GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3) -set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS +add_library(lapack OBJECT ${OPENBLAS_SRC}) From 2416d9dbac3dda3ce54cc659da18e4eb9b764989 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 24 Feb 2015 13:18:07 -0600 Subject: [PATCH 096/137] Fixed TRANSA defines for complex sources in driver/level2. --- driver/level2/CMakeLists.txt | 110 ++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 48 deletions(-) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index be275724f..d6179c0fb 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -12,12 +12,17 @@ set(UL_SOURCES ) # sources that need to be compiled several times, for UNIT, TRANSA -set(NU_SOURCES +set(U_SOURCES + trmv_U.c tbmv_U.c tbsv_U.c tpmv_U.c tpsv_U.c trsv_U.c +) + +set(L_SOURCES + trmv_L.c tbmv_L.c tbsv_L.c tpmv_L.c @@ -25,66 +30,60 @@ set(NU_SOURCES trsv_L.c ) +set(UL_SMP_SOURCES + symv_thread.c + syr_thread.c + syr2_thread.c + spr_thread.c + spr2_thread.c + spmv_thread.c + sbmv_thread.c +) + +set(NU_SMP_SOURCES + trmv_thread.c + tpmv_thread.c + tbmv_thread.c +) + # objects that need LOWER set GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) -# objects that need TRANSA and UNIT set -# N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* because of course why not have a different naming scheme for every single object -hpa -GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "" 3 "" "" 3) - -# gbmv uses a lowercase n and t. WHY? WHO KNOWS! +# gbmv uses a lowercase n and t GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) # special defines for complex trmv foreach (float_type ${FLOAT_TYPES}) - if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") - GenerateCombinationObjects("ztrmv_U.c" "UNIT" "N" "TRANSA=1" 0 "trmv_NU" false ${float_type}) - GenerateCombinationObjects("ztrmv_U.c" "UNIT" "N" "TRANSA=2" 0 "trmv_TL" false ${float_type}) - GenerateCombinationObjects("ztrmv_U.c" "UNIT" "N" "TRANSA=3" 0 "trmv_RU" false ${float_type}) - GenerateCombinationObjects("ztrmv_U.c" "UNIT" "N" "TRANSA=4" 0 "trmv_CL" false ${float_type}) - GenerateCombinationObjects("ztrmv_L.c" "UNIT" "N" "TRANSA=1" 0 "trmv_NL" false ${float_type}) - GenerateCombinationObjects("ztrmv_L.c" "UNIT" "N" "TRANSA=2" 0 "trmv_TU" false ${float_type}) - GenerateCombinationObjects("ztrmv_L.c" "UNIT" "N" "TRANSA=3" 0 "trmv_RL" false ${float_type}) - GenerateCombinationObjects("ztrmv_L.c" "UNIT" "N" "TRANSA=4" 0 "trmv_CU" false ${float_type}) - else () - GenerateCombinationObjects("trmv_U.c;trmv_L.c" "TRANSA;UNIT" "N;N" "" 3 "" false ${float_type}) - endif () -endforeach () - -if (SMP) - - - set(UL_SMP_SOURCES - symv_thread.c - syr_thread.c - syr2_thread.c - spr_thread.c - spr2_thread.c - spmv_thread.c - sbmv_thread.c - ) - - GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) - - set(NU_SMP_SOURCES - trmv_thread.c - tpmv_thread.c - tbmv_thread.c - ) - - GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "" 2) - - foreach (float_type ${FLOAT_TYPES}) + if (SMP) GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false ${float_type}) GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false ${float_type}) GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n" false "" "" false ${float_type}) GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t" false "" "" false ${float_type}) + endif () + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + + foreach (u_source ${U_SOURCES}) + string(REGEX MATCH "[a-z]+" op_name ${u_source}) + GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NU" false ${float_type}) + GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TL" false ${float_type}) + GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RU" false ${float_type}) + GenerateCombinationObjects("z${u_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CL" false ${float_type}) + endforeach () + + foreach (l_source ${L_SOURCES}) + string(REGEX MATCH "[a-z]+" op_name ${l_source}) + GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=1" 0 "${op_name}_NL" false ${float_type}) + GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=2" 0 "${op_name}_TU" false ${float_type}) + GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=3" 0 "${op_name}_RL" false ${float_type}) + GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type}) + endforeach () + + if (SMP) - if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") GenerateCombinationObjects("trmv_thread.c" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "trmv_thread_R" false ${float_type}) GenerateCombinationObjects("trmv_thread.c" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "trmv_thread_C" false ${float_type}) @@ -107,12 +106,27 @@ if (SMP) GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) - else () - GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type}) + foreach (nu_smp_src ${NU_SMP_SOURCSE}) + string(REGEX MATCH "[a-z]+" op_name ${nu_smp_src}) + GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) + GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=2" 0 "${op_name}_T" false ${float_type}) + GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "${op_name}_R" false ${float_type}) + GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "${op_name}_C" false ${float_type}) + endforeach () endif () - endforeach () + else () + # N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* + GenerateCombinationObjects("${U_SOURCES};${L_SOURCES}" "TRANSA;UNIT" "N;N" "" 3 "" false ${float_type}) + if (SMP) + GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type}) + GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "" 2 "" false ${float_type}) + endif () + endif () +endforeach () +if (SMP) + GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) endif () add_library(driver_level2 OBJECT ${OPENBLAS_SRC}) From 0553476fbaf668679c45b9f65c29249f97f2bdae Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 24 Feb 2015 14:30:35 -0600 Subject: [PATCH 097/137] Added TRANS defines for complex sources in lapack. --- driver/level2/CMakeLists.txt | 2 +- lapack/CMakeLists.txt | 35 +++++++++++++++-------------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index d6179c0fb..d35069cf9 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -53,7 +53,7 @@ GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) -# special defines for complex trmv +# special defines for complex foreach (float_type ${FLOAT_TYPES}) if (SMP) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index c8c82219d..de42e1ab6 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -38,25 +38,6 @@ set(UNIT_SOURCES2 trti2/trti2_L.c ) -# TODO: getrs needs to be compiled with up to TRANS=4 in the complex case -set(ZLAPACK_SOURCES - getf2/zgetf2_k.c - getrf/getrf_single.c - getrs/zgetrs_single.c - potrf/potrf_U_single.c - potrf/potrf_L_single.c - potf2/potf2_U.c - potf2/potf2_L.c - lauu2/zlauu2_U.c - lauu2/zlauu2_L.c - lauum/lauum_U_single.c - lauum/lauum_L_single.c - trti2/ztrti2_U.c - trti2/ztrti2_L.c - trtri/trtri_U_single.c - trtri/trtri_L_single.c -) - GenerateNamedObjects("${LAPACK_SOURCES}") GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" false "" "" false 3) @@ -95,7 +76,21 @@ if (SMP) GenerateNamedObjects("${PARALLEL_SOURCES}") endif () -GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" "" 3) +foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + foreach (trans_src ${TRANS_SOURCES}) + string(REGEX MATCH "[a-z]/([a-z]+_)([a-z]+)" op_name ${trans_src}) + string(REPLACE "/" "/z" ztrans_src ${trans_src}) + GenerateNamedObjects("${ztrans_src}" "TRANS=1" "${CMAKE_MATCH_1}N_${CMAKE_MATCH_2}" false "" "" false ${float_type}) + GenerateNamedObjects("${ztrans_src}" "TRANS=2" "${CMAKE_MATCH_1}T_${CMAKE_MATCH_2}" false "" "" false ${float_type}) + GenerateNamedObjects("${ztrans_src}" "TRANS=3" "${CMAKE_MATCH_1}R_${CMAKE_MATCH_2}" false "" "" false ${float_type}) + GenerateNamedObjects("${ztrans_src}" "TRANS=4" "${CMAKE_MATCH_1}C_${CMAKE_MATCH_2}" false "" "" false ${float_type}) + endforeach () + else () + GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" false ${float_type}) + endif () +endforeach () + GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3) From a8002b0c5f961856c0f8d8c924418d37474a0032 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 24 Feb 2015 14:31:18 -0600 Subject: [PATCH 098/137] Separated getarch ASM file when using MSVC. --- cmake/prebuild.cmake | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 32faeeea7..d2bada364 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -54,11 +54,20 @@ include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") # compile getarch +set(GETARCH_SRC + ${CMAKE_SOURCE_DIR}/getarch.c + ${CPUIDEMO} +) + +if (NOT MSVC) + list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) +endif () + set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") file(MAKE_DIRECTORY ${GETARCH_DIR}) try_compile(GETARCH_RESULT ${GETARCH_DIR} - SOURCES ${CMAKE_SOURCE_DIR}/getarch.c ${CMAKE_SOURCE_DIR}/cpuid.S ${CPUIDEMO} + SOURCES ${GETARCH_SRC} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GETARCH_LOG COPY_FILE ${GETARCH_BIN} From 504cdb10ed0eab06662be1179b0be141764ee17c Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 24 Feb 2015 14:31:45 -0600 Subject: [PATCH 099/137] Added check for MSVC before enabling fortran. Currently forcing gfortran, instead of assuming ifort. --- CMakeLists.txt | 1 - cmake/f_check.cmake | 7 +++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 530f3dda3..64d27da42 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,7 +9,6 @@ set(OpenBLAS_MINOR_VERSION 2) set(OpenBLAS_PATCH_VERSION 13) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") -enable_language(Fortran) enable_language(ASM) message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index 266cdbb2a..f7651db56 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -20,6 +20,13 @@ # NEEDBUNDERSCORE # NEED2UNDERSCORES +if (MSVC) + # had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) +endif () +enable_language(Fortran) + if (NOT ONLY_CBLAS) # N.B. f_check is not cross-platform, so instead try to use CMake variables # run f_check (appends to TARGET files) From ab7043373fa1b6993439ca1b14f14bb8967198dd Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Tue, 24 Feb 2015 15:18:16 -0600 Subject: [PATCH 100/137] Fixed bug generating trmv complex source names. --- driver/level2/CMakeLists.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index d35069cf9..e4440be6d 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -84,9 +84,6 @@ foreach (float_type ${FLOAT_TYPES}) if (SMP) - GenerateCombinationObjects("trmv_thread.c" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "trmv_thread_R" false ${float_type}) - GenerateCombinationObjects("trmv_thread.c" "LOWER;UNIT" "U;N" "TRANSA=4" 0 "trmv_thread_C" false ${float_type}) - GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) GenerateNamedObjects("gemv_thread.c" "CONJ;TRANSA" "gemv_thread_c" false "" "" false ${float_type}) GenerateNamedObjects("gemv_thread.c" "XCONJ" "gemv_thread_o" false "" "" false ${float_type}) @@ -106,8 +103,8 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) - foreach (nu_smp_src ${NU_SMP_SOURCSE}) - string(REGEX MATCH "[a-z]+" op_name ${nu_smp_src}) + foreach (nu_smp_src ${NU_SMP_SOURCES}) + string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=2" 0 "${op_name}_T" false ${float_type}) GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=3" 0 "${op_name}_R" false ${float_type}) From 9eaea02f33a52443814be07ed533420715f57698 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 25 Feb 2015 09:39:11 -0600 Subject: [PATCH 101/137] Added additional gemm defines for complex types. --- driver/level3/CMakeLists.txt | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index fac96cc82..376a0beeb 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -12,6 +12,7 @@ endif () # loop through gemm.c defines set(GEMM_DEFINES NN NT TN TT) +set(GEMM_COMPLEX_DEFINES RN CN RT CT NR TR RR CR NC TC RC CC) foreach (GEMM_DEFINE ${GEMM_DEFINES}) string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0) @@ -53,6 +54,14 @@ foreach (float_type ${FLOAT_TYPES}) GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type}) + # special gemm defines for complex + foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) + string(TOLOWER ${gemm_define} gemm_define_LC) + GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type}) + if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) + endif () + endforeach () endif () endforeach () From 00e373aea6c4f8f11797d230bba0ce49f573b191 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 25 Feb 2015 10:18:18 -0600 Subject: [PATCH 102/137] Added LAPACK sources directly to add_library call instead of OBJECT. --- CMakeLists.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 64d27da42..cbc4fb9a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,12 +91,10 @@ endforeach () # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") -add_library(LA_OBJ OBJECT ${LA_SOURCES}) -set_target_properties(LA_OBJ PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") -list(APPEND TARGET_OBJS "$") +set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") # add objects to the openblas lib -add_library(openblas ${TARGET_OBJS}) +add_library(openblas ${LA_SOURCES} ${TARGET_OBJS}) # TODO: Why is the config saved here? Is this necessary with CMake? #Save the config files for installation From 518e2424a8b14f314a850b297c867f9f18bd622e Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 25 Feb 2015 11:51:29 -0600 Subject: [PATCH 103/137] Fixed bad filename for cpuid.S compile. --- kernel/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 368af90b2..4fe27a7d0 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -20,7 +20,7 @@ ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") ParseMakefileVars("${KERNELDIR}/KERNEL") if (${ARCH} STREQUAL "x86") - GenerateNamedObjects("${KERNELDIR}/cpuid.S" "") + GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true) endif () # don't use float type name mangling here From 84d90d6ed85853eecb3ea17f1f23d3a5d7e8d264 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 25 Feb 2015 11:52:25 -0600 Subject: [PATCH 104/137] Fixed some compiler errors/warnings for clang. --- common.h | 2 +- cpuid_x86.c | 4 ++-- driver/others/memory.c | 4 ++-- kernel/arm/zdot.c | 6 +++++- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/common.h b/common.h index 0761f5536..0a7bb66bc 100644 --- a/common.h +++ b/common.h @@ -310,7 +310,7 @@ typedef int blasint; #endif #if defined(OS_WINDOWS) -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) #define YIELDING YieldProcessor() #else #define YIELDING SwitchToThread() diff --git a/cpuid_x86.c b/cpuid_x86.c index 6b7e408d8..8a8a802a0 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -40,7 +40,7 @@ #include #include "cpuid.h" -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) #define C_INLINE __inline #else #define C_INLINE inline @@ -154,7 +154,7 @@ static C_INLINE int have_excpuid(void){ #ifndef NO_AVX static C_INLINE void xgetbv(int op, int * eax, int * edx){ //Use binary code for xgetbv -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) *eax = __xgetbv(op); #else __asm__ __volatile__ diff --git a/driver/others/memory.c b/driver/others/memory.c index 16d68cced..fa364785b 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -769,12 +769,12 @@ static void *alloc_hugetlb(void *address){ if (LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &tp.Privileges[0].Luid) != TRUE) { CloseHandle(hToken); - return -1; + return (void*)-1; } if (AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL) != TRUE) { CloseHandle(hToken); - return -1; + return (void*)-1; } map_address = (void *)VirtualAlloc(address, diff --git a/kernel/arm/zdot.c b/kernel/arm/zdot.c index 469487531..198104022 100644 --- a/kernel/arm/zdot.c +++ b/kernel/arm/zdot.c @@ -35,9 +35,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **************************************************************************************/ #include "common.h" -#include +#ifndef _MSC_VER +#include FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else +openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif { BLASLONG i=0; BLASLONG ix=0,iy=0; From 5ae8993752886033161ef74184f333a2401c8ba9 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 25 Feb 2015 11:52:51 -0600 Subject: [PATCH 105/137] Added intrinsics for MSVC. --- common_x86.h | 29 +++++++++++++++++++++++++++++ driver/others/memory.c | 27 +++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/common_x86.h b/common_x86.h index f096e9074..0cb242c4e 100644 --- a/common_x86.h +++ b/common_x86.h @@ -56,41 +56,65 @@ static void __inline blas_lock(volatile BLASULONG *address){ do { while (*address) {YIELDING;}; +#if defined(_MSC_VER) && !defined(__clang__) + // use intrinsic instead of inline assembly + ret = _InterlockedExchange(address, 1); + // inline assembly + /*__asm { + mov eax, address + mov ebx, 1 + xchg [eax], ebx + mov ret, ebx + }*/ +#else __asm__ __volatile__( "xchgl %0, %1\n" : "=r"(ret), "=m"(*address) : "0"(1), "m"(*address) : "memory"); +#endif } while (ret); } static __inline unsigned long long rpcc(void){ +#if defined(_MSC_VER) && !defined(__clang__) + return __rdtsc(); // use MSVC intrinsic +#else unsigned int a, d; __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); return ((unsigned long long)a + ((unsigned long long)d << 32)); +#endif }; static __inline unsigned long getstackaddr(void){ +#if defined(_MSC_VER) && !defined(__clang__) + return (unsigned long)_ReturnAddress(); // use MSVC intrinsic +#else unsigned long addr; __asm__ __volatile__ ("mov %%esp, %0" : "=r"(addr) : : "memory"); return addr; +#endif }; static __inline long double sqrt_long(long double val) { +#if defined(_MSC_VER) && !defined(__clang__) + return sqrt(val); // not sure if this will use fsqrt +#else long double result; __asm__ __volatile__ ("fldt %1\n" "fsqrt\n" "fstpt %0\n" : "=m" (result) : "m"(val)); return result; +#endif } #define SQRT(a) sqrt_long(a) @@ -146,9 +170,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ y = blas_quick_divide_table[y]; +#if defined(_MSC_VER) && !defined(__clang__) + (void*)result; + return x*y; +#else __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); return result; +#endif } #endif diff --git a/driver/others/memory.c b/driver/others/memory.c index fa364785b..70bfa7a57 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -137,8 +137,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) +#if defined(_MSC_VER) && !defined(__clang__) +#define CONSTRUCTOR __cdecl +#define DESTRUCTOR __cdecl +#else #define CONSTRUCTOR __attribute__ ((constructor)) #define DESTRUCTOR __attribute__ ((destructor)) +#endif #ifdef DYNAMIC_ARCH gotoblas_t *gotoblas = NULL; @@ -1360,6 +1365,28 @@ void DESTRUCTOR gotoblas_quit(void) { blas_shutdown(); } +#if defined(_MSC_VER) && !defined(__clang__) +BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) +{ + switch (ul_reason_for_call) + { + case DLL_PROCESS_ATTACH: + gotoblas_init(); + break; + case DLL_THREAD_ATTACH: + break; + case DLL_THREAD_DETACH: + break; + case DLL_PROCESS_DETACH: + gotoblas_quit(); + break; + default: + break; + } + return TRUE; +} +#endif + #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64)) /* Don't call me; this is just work around for PGI / Sun bug */ void gotoblas_dummy_for_PGI(void) { From 3649cfbd7be57028c83eea956f3c13d5fb403756 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 25 Feb 2015 12:23:26 -0600 Subject: [PATCH 106/137] Fixed EPILOGUE for clang. --- common_x86.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/common_x86.h b/common_x86.h index 0cb242c4e..9d2df41b8 100644 --- a/common_x86.h +++ b/common_x86.h @@ -313,8 +313,12 @@ REALNAME: #define PROFCODE +#ifdef __clang__ +#define EPILOGUE .end +#else #define EPILOGUE .end REALNAME #endif +#endif #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) #define PROLOGUE \ From e19bf3a28bf13d9c0e1306ba07c41aa167561579 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 25 Feb 2015 14:44:49 -0600 Subject: [PATCH 107/137] Removed MSVC cpuid func when using clang. --- cpuid_x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpuid_x86.c b/cpuid_x86.c index 8a8a802a0..c85e9424d 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -59,7 +59,7 @@ #endif */ -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) { From 1d183dcda8332ecadbd4aa0c59232d14651ffe52 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Wed, 25 Feb 2015 16:51:08 -0600 Subject: [PATCH 108/137] Added lapacke sources. --- CMakeLists.txt | 10 +++++++--- cmake/lapack.cmake | 2 +- cmake/prebuild.cmake | 5 ++++- cmake/system.cmake | 10 ++++++---- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cbc4fb9a1..5c2681141 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -90,11 +90,15 @@ endforeach () # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. -include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") -set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") +if (NOT NOFORTRAN) + include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") +endif () +if (NOT NO_LAPACKE) + include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") +endif () # add objects to the openblas lib -add_library(openblas ${LA_SOURCES} ${TARGET_OBJS}) +add_library(openblas ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) # TODO: Why is the config saved here? Is this necessary with CMake? #Save the config files for installation diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index e8d19f10d..3e81611ab 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -344,4 +344,4 @@ set(LA_SOURCES "") foreach (LA_FILE ${LA_REL_SRC}) list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}") endforeach () - +set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index d2bada364..3e2574f77 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -51,7 +51,10 @@ else() endif () include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") -include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") + +if (NOT NOFORTRAN) + include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") +endif () # compile getarch set(GETARCH_SRC diff --git a/cmake/system.cmake b/cmake/system.cmake index cc7373e47..36f9b7cbd 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -132,8 +132,10 @@ include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake") # C Compiler dependent settings include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake") -# Fortran Compiler dependent settings -include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake") +if (NOT NOFORTRAN) + # Fortran Compiler dependent settings + include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake") +endif () if (BINARY64) if (INTERFACE64) @@ -342,7 +344,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") endforeach () endif () -if (${F_COMPILER} STREQUAL "GFORTRAN") +if ("${F_COMPILER}" STREQUAL "GFORTRAN") # lapack-netlib is rife with uninitialized warnings -hpa set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized") endif () @@ -356,7 +358,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS") endif () -if (${CMAKE_C_COMPILER} STREQUAL "LSB") +if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE") endif () From 7ac7e147d4d9961d9082338efe7d6c6e6aea9a29 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 4 Aug 2015 04:37:05 +0800 Subject: [PATCH 109/137] Fixed cmake building bugs on Linux. Disable LAPACK by default. --- CMakeLists.txt | 42 ++++++++++++++++++++++++++++++++++++++---- cmake/prebuild.cmake | 10 +++++----- kernel/CMakeLists.txt | 4 ++-- kernel/x86_64/KERNEL | 4 ++-- 4 files changed, 47 insertions(+), 13 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c2681141..1d2e5d3c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,10 +6,29 @@ cmake_minimum_required(VERSION 2.8.4) project(OpenBLAS) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 2) -set(OpenBLAS_PATCH_VERSION 13) +set(OpenBLAS_PATCH_VERSION 14) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") enable_language(ASM) +enable_language(C) + +set(OpenBLAS_LIBNAME openblas) + +####### +option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS and CBLAS)" ON) +option(BUILD_DEBUG "Build Debug Version" OFF) +####### +if(BUILD_WITHOUT_LAPACK) +set(NO_LAPACK 1) +endif() + +if(BUILD_DEBUG) +set(CMAKE_BUILD_TYPE Debug) +else() +set(CMAKE_BUILD_TYPE Release) +endif() +####### + message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") @@ -32,6 +51,7 @@ endif () set(SUBDIRS ${BLASDIRS}) if (NOT NO_LAPACK) + message ("error 1") list(APPEND SUBDIRS lapack) endif () @@ -90,15 +110,29 @@ endforeach () # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. -if (NOT NOFORTRAN) +if (NOT NOFORTRAN AND NOT NO_LAPACK) + message ("error 2") include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") -endif () if (NOT NO_LAPACKE) include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") endif () +endif () # add objects to the openblas lib -add_library(openblas ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) +add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) + +#only build shared library for MSVC +if(NOT MSVC) +add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) +set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) +set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) +endif() + +set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES + VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION} + SOVERSION ${OpenBLAS_MAJOR_VERSION} +) + # TODO: Why is the config saved here? Is this necessary with CMake? #Save the config files for installation diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 3e2574f77..901c237c4 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -79,10 +79,10 @@ try_compile(GETARCH_RESULT ${GETARCH_DIR} message(STATUS "Running getarch") # use the cmake binary w/ the -E param to run a shell command in a cross-platform way -execute_process(COMMAND ${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) -execute_process(COMMAND ${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) +execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) +execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) -#message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") +message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") # append config data from getarch to the TARGET file and read in CMake vars file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT}) @@ -99,8 +99,8 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR} ) # use the cmake binary w/ the -E param to run a shell command in a cross-platform way -execute_process(COMMAND ${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) -execute_process(COMMAND ${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) +execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) +execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) # append config data from getarch_2nd to the TARGET file and read in CMake vars file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT}) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 4fe27a7d0..cd71101a5 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -16,8 +16,9 @@ else () endif () SetDefaultL1() -ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") +SetDefaultL2() ParseMakefileVars("${KERNELDIR}/KERNEL") +ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") if (${ARCH} STREQUAL "x86") GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true) @@ -67,7 +68,6 @@ foreach (float_type ${FLOAT_TYPES}) endforeach () # Makefile.L2 -SetDefaultL2() GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) foreach (float_type ${FLOAT_TYPES}) diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 3508753ee..02e5a6047 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -119,11 +119,11 @@ XCOPYKERNEL = zcopy.S endif ifndef SDOTKERNEL -SDOTKERNEL = ../generic/dot.c +SDOTKERNEL = ../generic/dot.c endif ifndef DSDOTKERNEL -DSDOTKERNEL = ../generic/dot.c +DSDOTKERNEL = ../generic/dot.c endif ifndef DDOTKERNEL From 19664f3ef414e2ba23e55db9f6740c4fba0a35d7 Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Thu, 6 Aug 2015 07:40:06 -0500 Subject: [PATCH 110/137] Added missing lapacke.cmake file. --- cmake/lapacke.cmake | 2067 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2067 insertions(+) create mode 100644 cmake/lapacke.cmake diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake new file mode 100644 index 000000000..ce7f781dd --- /dev/null +++ b/cmake/lapacke.cmake @@ -0,0 +1,2067 @@ + +set(C_SRC + lapacke_cbbcsd.c + lapacke_cbbcsd_work.c + lapacke_cbdsqr.c + lapacke_cbdsqr_work.c + lapacke_cgbbrd.c + lapacke_cgbbrd_work.c + lapacke_cgbcon.c + lapacke_cgbcon_work.c + lapacke_cgbequ.c + lapacke_cgbequ_work.c + lapacke_cgbequb.c + lapacke_cgbequb_work.c + lapacke_cgbrfs.c + lapacke_cgbrfs_work.c + lapacke_cgbsv.c + lapacke_cgbsv_work.c + lapacke_cgbsvx.c + lapacke_cgbsvx_work.c + lapacke_cgbtrf.c + lapacke_cgbtrf_work.c + lapacke_cgbtrs.c + lapacke_cgbtrs_work.c + lapacke_cgebak.c + lapacke_cgebak_work.c + lapacke_cgebal.c + lapacke_cgebal_work.c + lapacke_cgebrd.c + lapacke_cgebrd_work.c + lapacke_cgecon.c + lapacke_cgecon_work.c + lapacke_cgeequ.c + lapacke_cgeequ_work.c + lapacke_cgeequb.c + lapacke_cgeequb_work.c + lapacke_cgees.c + lapacke_cgees_work.c + lapacke_cgeesx.c + lapacke_cgeesx_work.c + lapacke_cgeev.c + lapacke_cgeev_work.c + lapacke_cgeevx.c + lapacke_cgeevx_work.c + lapacke_cgehrd.c + lapacke_cgehrd_work.c + lapacke_cgelq2.c + lapacke_cgelq2_work.c + lapacke_cgelqf.c + lapacke_cgelqf_work.c + lapacke_cgels.c + lapacke_cgels_work.c + lapacke_cgelsd.c + lapacke_cgelsd_work.c + lapacke_cgelss.c + lapacke_cgelss_work.c + lapacke_cgelsy.c + lapacke_cgelsy_work.c + lapacke_cgemqrt.c + lapacke_cgemqrt_work.c + lapacke_cgeqlf.c + lapacke_cgeqlf_work.c + lapacke_cgeqp3.c + lapacke_cgeqp3_work.c + lapacke_cgeqpf.c + lapacke_cgeqpf_work.c + lapacke_cgeqr2.c + lapacke_cgeqr2_work.c + lapacke_cgeqrf.c + lapacke_cgeqrf_work.c + lapacke_cgeqrfp.c + lapacke_cgeqrfp_work.c + lapacke_cgeqrt.c + lapacke_cgeqrt2.c + lapacke_cgeqrt2_work.c + lapacke_cgeqrt3.c + lapacke_cgeqrt3_work.c + lapacke_cgeqrt_work.c + lapacke_cgerfs.c + lapacke_cgerfs_work.c + lapacke_cgerqf.c + lapacke_cgerqf_work.c + lapacke_cgesdd.c + lapacke_cgesdd_work.c + lapacke_cgesv.c + lapacke_cgesv_work.c + lapacke_cgesvd.c + lapacke_cgesvd_work.c + lapacke_cgesvx.c + lapacke_cgesvx_work.c + lapacke_cgetf2.c + lapacke_cgetf2_work.c + lapacke_cgetrf.c + lapacke_cgetrf_work.c + lapacke_cgetri.c + lapacke_cgetri_work.c + lapacke_cgetrs.c + lapacke_cgetrs_work.c + lapacke_cggbak.c + lapacke_cggbak_work.c + lapacke_cggbal.c + lapacke_cggbal_work.c + lapacke_cgges.c + lapacke_cgges_work.c + lapacke_cggesx.c + lapacke_cggesx_work.c + lapacke_cggev.c + lapacke_cggev_work.c + lapacke_cggevx.c + lapacke_cggevx_work.c + lapacke_cggglm.c + lapacke_cggglm_work.c + lapacke_cgghrd.c + lapacke_cgghrd_work.c + lapacke_cgglse.c + lapacke_cgglse_work.c + lapacke_cggqrf.c + lapacke_cggqrf_work.c + lapacke_cggrqf.c + lapacke_cggrqf_work.c + lapacke_cggsvd.c + lapacke_cggsvd_work.c + lapacke_cggsvp.c + lapacke_cggsvp_work.c + lapacke_cgtcon.c + lapacke_cgtcon_work.c + lapacke_cgtrfs.c + lapacke_cgtrfs_work.c + lapacke_cgtsv.c + lapacke_cgtsv_work.c + lapacke_cgtsvx.c + lapacke_cgtsvx_work.c + lapacke_cgttrf.c + lapacke_cgttrf_work.c + lapacke_cgttrs.c + lapacke_cgttrs_work.c + lapacke_chbev.c + lapacke_chbev_work.c + lapacke_chbevd.c + lapacke_chbevd_work.c + lapacke_chbevx.c + lapacke_chbevx_work.c + lapacke_chbgst.c + lapacke_chbgst_work.c + lapacke_chbgv.c + lapacke_chbgv_work.c + lapacke_chbgvd.c + lapacke_chbgvd_work.c + lapacke_chbgvx.c + lapacke_chbgvx_work.c + lapacke_chbtrd.c + lapacke_chbtrd_work.c + lapacke_checon.c + lapacke_checon_work.c + lapacke_cheequb.c + lapacke_cheequb_work.c + lapacke_cheev.c + lapacke_cheev_work.c + lapacke_cheevd.c + lapacke_cheevd_work.c + lapacke_cheevr.c + lapacke_cheevr_work.c + lapacke_cheevx.c + lapacke_cheevx_work.c + lapacke_chegst.c + lapacke_chegst_work.c + lapacke_chegv.c + lapacke_chegv_work.c + lapacke_chegvd.c + lapacke_chegvd_work.c + lapacke_chegvx.c + lapacke_chegvx_work.c + lapacke_cherfs.c + lapacke_cherfs_work.c + lapacke_chesv.c + lapacke_chesv_work.c + lapacke_chesvx.c + lapacke_chesvx_work.c + lapacke_cheswapr.c + lapacke_cheswapr_work.c + lapacke_chetrd.c + lapacke_chetrd_work.c + lapacke_chetrf.c + lapacke_chetrf_work.c + lapacke_chetri.c + lapacke_chetri2.c + lapacke_chetri2_work.c + lapacke_chetri2x.c + lapacke_chetri2x_work.c + lapacke_chetri_work.c + lapacke_chetrs.c + lapacke_chetrs2.c + lapacke_chetrs2_work.c + lapacke_chetrs_work.c + lapacke_chfrk.c + lapacke_chfrk_work.c + lapacke_chgeqz.c + lapacke_chgeqz_work.c + lapacke_chpcon.c + lapacke_chpcon_work.c + lapacke_chpev.c + lapacke_chpev_work.c + lapacke_chpevd.c + lapacke_chpevd_work.c + lapacke_chpevx.c + lapacke_chpevx_work.c + lapacke_chpgst.c + lapacke_chpgst_work.c + lapacke_chpgv.c + lapacke_chpgv_work.c + lapacke_chpgvd.c + lapacke_chpgvd_work.c + lapacke_chpgvx.c + lapacke_chpgvx_work.c + lapacke_chprfs.c + lapacke_chprfs_work.c + lapacke_chpsv.c + lapacke_chpsv_work.c + lapacke_chpsvx.c + lapacke_chpsvx_work.c + lapacke_chptrd.c + lapacke_chptrd_work.c + lapacke_chptrf.c + lapacke_chptrf_work.c + lapacke_chptri.c + lapacke_chptri_work.c + lapacke_chptrs.c + lapacke_chptrs_work.c + lapacke_chsein.c + lapacke_chsein_work.c + lapacke_chseqr.c + lapacke_chseqr_work.c + lapacke_clacgv.c + lapacke_clacgv_work.c + lapacke_clacn2.c + lapacke_clacn2_work.c + lapacke_clacp2.c + lapacke_clacp2_work.c + lapacke_clacpy.c + lapacke_clacpy_work.c + lapacke_clag2z.c + lapacke_clag2z_work.c + lapacke_clange.c + lapacke_clange_work.c + lapacke_clanhe.c + lapacke_clanhe_work.c + lapacke_clansy.c + lapacke_clansy_work.c + lapacke_clantr.c + lapacke_clantr_work.c + lapacke_clapmr.c + lapacke_clapmr_work.c + lapacke_clarfb.c + lapacke_clarfb_work.c + lapacke_clarfg.c + lapacke_clarfg_work.c + lapacke_clarft.c + lapacke_clarft_work.c + lapacke_clarfx.c + lapacke_clarfx_work.c + lapacke_clarnv.c + lapacke_clarnv_work.c + lapacke_claset.c + lapacke_claset_work.c + lapacke_claswp.c + lapacke_claswp_work.c + lapacke_clauum.c + lapacke_clauum_work.c + lapacke_cpbcon.c + lapacke_cpbcon_work.c + lapacke_cpbequ.c + lapacke_cpbequ_work.c + lapacke_cpbrfs.c + lapacke_cpbrfs_work.c + lapacke_cpbstf.c + lapacke_cpbstf_work.c + lapacke_cpbsv.c + lapacke_cpbsv_work.c + lapacke_cpbsvx.c + lapacke_cpbsvx_work.c + lapacke_cpbtrf.c + lapacke_cpbtrf_work.c + lapacke_cpbtrs.c + lapacke_cpbtrs_work.c + lapacke_cpftrf.c + lapacke_cpftrf_work.c + lapacke_cpftri.c + lapacke_cpftri_work.c + lapacke_cpftrs.c + lapacke_cpftrs_work.c + lapacke_cpocon.c + lapacke_cpocon_work.c + lapacke_cpoequ.c + lapacke_cpoequ_work.c + lapacke_cpoequb.c + lapacke_cpoequb_work.c + lapacke_cporfs.c + lapacke_cporfs_work.c + lapacke_cposv.c + lapacke_cposv_work.c + lapacke_cposvx.c + lapacke_cposvx_work.c + lapacke_cpotrf.c + lapacke_cpotrf_work.c + lapacke_cpotri.c + lapacke_cpotri_work.c + lapacke_cpotrs.c + lapacke_cpotrs_work.c + lapacke_cppcon.c + lapacke_cppcon_work.c + lapacke_cppequ.c + lapacke_cppequ_work.c + lapacke_cpprfs.c + lapacke_cpprfs_work.c + lapacke_cppsv.c + lapacke_cppsv_work.c + lapacke_cppsvx.c + lapacke_cppsvx_work.c + lapacke_cpptrf.c + lapacke_cpptrf_work.c + lapacke_cpptri.c + lapacke_cpptri_work.c + lapacke_cpptrs.c + lapacke_cpptrs_work.c + lapacke_cpstrf.c + lapacke_cpstrf_work.c + lapacke_cptcon.c + lapacke_cptcon_work.c + lapacke_cpteqr.c + lapacke_cpteqr_work.c + lapacke_cptrfs.c + lapacke_cptrfs_work.c + lapacke_cptsv.c + lapacke_cptsv_work.c + lapacke_cptsvx.c + lapacke_cptsvx_work.c + lapacke_cpttrf.c + lapacke_cpttrf_work.c + lapacke_cpttrs.c + lapacke_cpttrs_work.c + lapacke_cspcon.c + lapacke_cspcon_work.c + lapacke_csprfs.c + lapacke_csprfs_work.c + lapacke_cspsv.c + lapacke_cspsv_work.c + lapacke_cspsvx.c + lapacke_cspsvx_work.c + lapacke_csptrf.c + lapacke_csptrf_work.c + lapacke_csptri.c + lapacke_csptri_work.c + lapacke_csptrs.c + lapacke_csptrs_work.c + lapacke_cstedc.c + lapacke_cstedc_work.c + lapacke_cstegr.c + lapacke_cstegr_work.c + lapacke_cstein.c + lapacke_cstein_work.c + lapacke_cstemr.c + lapacke_cstemr_work.c + lapacke_csteqr.c + lapacke_csteqr_work.c + lapacke_csycon.c + lapacke_csycon_work.c + lapacke_csyconv.c + lapacke_csyconv_work.c + lapacke_csyequb.c + lapacke_csyequb_work.c + lapacke_csyrfs.c + lapacke_csyrfs_work.c + lapacke_csysv.c + lapacke_csysv_rook.c + lapacke_csysv_rook_work.c + lapacke_csysv_work.c + lapacke_csysvx.c + lapacke_csysvx_work.c + lapacke_csyswapr.c + lapacke_csyswapr_work.c + lapacke_csytrf.c + lapacke_csytrf_work.c + lapacke_csytri.c + lapacke_csytri2.c + lapacke_csytri2_work.c + lapacke_csytri2x.c + lapacke_csytri2x_work.c + lapacke_csytri_work.c + lapacke_csytrs.c + lapacke_csytrs2.c + lapacke_csytrs2_work.c + lapacke_csytrs_work.c + lapacke_ctbcon.c + lapacke_ctbcon_work.c + lapacke_ctbrfs.c + lapacke_ctbrfs_work.c + lapacke_ctbtrs.c + lapacke_ctbtrs_work.c + lapacke_ctfsm.c + lapacke_ctfsm_work.c + lapacke_ctftri.c + lapacke_ctftri_work.c + lapacke_ctfttp.c + lapacke_ctfttp_work.c + lapacke_ctfttr.c + lapacke_ctfttr_work.c + lapacke_ctgevc.c + lapacke_ctgevc_work.c + lapacke_ctgexc.c + lapacke_ctgexc_work.c + lapacke_ctgsen.c + lapacke_ctgsen_work.c + lapacke_ctgsja.c + lapacke_ctgsja_work.c + lapacke_ctgsna.c + lapacke_ctgsna_work.c + lapacke_ctgsyl.c + lapacke_ctgsyl_work.c + lapacke_ctpcon.c + lapacke_ctpcon_work.c + lapacke_ctpmqrt.c + lapacke_ctpmqrt_work.c + lapacke_ctpqrt.c + lapacke_ctpqrt2.c + lapacke_ctpqrt2_work.c + lapacke_ctpqrt_work.c + lapacke_ctprfb.c + lapacke_ctprfb_work.c + lapacke_ctprfs.c + lapacke_ctprfs_work.c + lapacke_ctptri.c + lapacke_ctptri_work.c + lapacke_ctptrs.c + lapacke_ctptrs_work.c + lapacke_ctpttf.c + lapacke_ctpttf_work.c + lapacke_ctpttr.c + lapacke_ctpttr_work.c + lapacke_ctrcon.c + lapacke_ctrcon_work.c + lapacke_ctrevc.c + lapacke_ctrevc_work.c + lapacke_ctrexc.c + lapacke_ctrexc_work.c + lapacke_ctrrfs.c + lapacke_ctrrfs_work.c + lapacke_ctrsen.c + lapacke_ctrsen_work.c + lapacke_ctrsna.c + lapacke_ctrsna_work.c + lapacke_ctrsyl.c + lapacke_ctrsyl_work.c + lapacke_ctrtri.c + lapacke_ctrtri_work.c + lapacke_ctrtrs.c + lapacke_ctrtrs_work.c + lapacke_ctrttf.c + lapacke_ctrttf_work.c + lapacke_ctrttp.c + lapacke_ctrttp_work.c + lapacke_ctzrzf.c + lapacke_ctzrzf_work.c + lapacke_cunbdb.c + lapacke_cunbdb_work.c + lapacke_cuncsd.c + lapacke_cuncsd_work.c + lapacke_cungbr.c + lapacke_cungbr_work.c + lapacke_cunghr.c + lapacke_cunghr_work.c + lapacke_cunglq.c + lapacke_cunglq_work.c + lapacke_cungql.c + lapacke_cungql_work.c + lapacke_cungqr.c + lapacke_cungqr_work.c + lapacke_cungrq.c + lapacke_cungrq_work.c + lapacke_cungtr.c + lapacke_cungtr_work.c + lapacke_cunmbr.c + lapacke_cunmbr_work.c + lapacke_cunmhr.c + lapacke_cunmhr_work.c + lapacke_cunmlq.c + lapacke_cunmlq_work.c + lapacke_cunmql.c + lapacke_cunmql_work.c + lapacke_cunmqr.c + lapacke_cunmqr_work.c + lapacke_cunmrq.c + lapacke_cunmrq_work.c + lapacke_cunmrz.c + lapacke_cunmrz_work.c + lapacke_cunmtr.c + lapacke_cunmtr_work.c + lapacke_cupgtr.c + lapacke_cupgtr_work.c + lapacke_cupmtr.c + lapacke_cupmtr_work.c +) + +set(DSRC + lapacke_dbbcsd.c + lapacke_dbbcsd_work.c + lapacke_dbdsdc.c + lapacke_dbdsdc_work.c + lapacke_dbdsqr.c + lapacke_dbdsqr_work.c + lapacke_ddisna.c + lapacke_ddisna_work.c + lapacke_dgbbrd.c + lapacke_dgbbrd_work.c + lapacke_dgbcon.c + lapacke_dgbcon_work.c + lapacke_dgbequ.c + lapacke_dgbequ_work.c + lapacke_dgbequb.c + lapacke_dgbequb_work.c + lapacke_dgbrfs.c + lapacke_dgbrfs_work.c + lapacke_dgbsv.c + lapacke_dgbsv_work.c + lapacke_dgbsvx.c + lapacke_dgbsvx_work.c + lapacke_dgbtrf.c + lapacke_dgbtrf_work.c + lapacke_dgbtrs.c + lapacke_dgbtrs_work.c + lapacke_dgebak.c + lapacke_dgebak_work.c + lapacke_dgebal.c + lapacke_dgebal_work.c + lapacke_dgebrd.c + lapacke_dgebrd_work.c + lapacke_dgecon.c + lapacke_dgecon_work.c + lapacke_dgeequ.c + lapacke_dgeequ_work.c + lapacke_dgeequb.c + lapacke_dgeequb_work.c + lapacke_dgees.c + lapacke_dgees_work.c + lapacke_dgeesx.c + lapacke_dgeesx_work.c + lapacke_dgeev.c + lapacke_dgeev_work.c + lapacke_dgeevx.c + lapacke_dgeevx_work.c + lapacke_dgehrd.c + lapacke_dgehrd_work.c + lapacke_dgejsv.c + lapacke_dgejsv_work.c + lapacke_dgelq2.c + lapacke_dgelq2_work.c + lapacke_dgelqf.c + lapacke_dgelqf_work.c + lapacke_dgels.c + lapacke_dgels_work.c + lapacke_dgelsd.c + lapacke_dgelsd_work.c + lapacke_dgelss.c + lapacke_dgelss_work.c + lapacke_dgelsy.c + lapacke_dgelsy_work.c + lapacke_dgemqrt.c + lapacke_dgemqrt_work.c + lapacke_dgeqlf.c + lapacke_dgeqlf_work.c + lapacke_dgeqp3.c + lapacke_dgeqp3_work.c + lapacke_dgeqpf.c + lapacke_dgeqpf_work.c + lapacke_dgeqr2.c + lapacke_dgeqr2_work.c + lapacke_dgeqrf.c + lapacke_dgeqrf_work.c + lapacke_dgeqrfp.c + lapacke_dgeqrfp_work.c + lapacke_dgeqrt.c + lapacke_dgeqrt2.c + lapacke_dgeqrt2_work.c + lapacke_dgeqrt3.c + lapacke_dgeqrt3_work.c + lapacke_dgeqrt_work.c + lapacke_dgerfs.c + lapacke_dgerfs_work.c + lapacke_dgerqf.c + lapacke_dgerqf_work.c + lapacke_dgesdd.c + lapacke_dgesdd_work.c + lapacke_dgesv.c + lapacke_dgesv_work.c + lapacke_dgesvd.c + lapacke_dgesvd_work.c + lapacke_dgesvj.c + lapacke_dgesvj_work.c + lapacke_dgesvx.c + lapacke_dgesvx_work.c + lapacke_dgetf2.c + lapacke_dgetf2_work.c + lapacke_dgetrf.c + lapacke_dgetrf_work.c + lapacke_dgetri.c + lapacke_dgetri_work.c + lapacke_dgetrs.c + lapacke_dgetrs_work.c + lapacke_dggbak.c + lapacke_dggbak_work.c + lapacke_dggbal.c + lapacke_dggbal_work.c + lapacke_dgges.c + lapacke_dgges_work.c + lapacke_dggesx.c + lapacke_dggesx_work.c + lapacke_dggev.c + lapacke_dggev_work.c + lapacke_dggevx.c + lapacke_dggevx_work.c + lapacke_dggglm.c + lapacke_dggglm_work.c + lapacke_dgghrd.c + lapacke_dgghrd_work.c + lapacke_dgglse.c + lapacke_dgglse_work.c + lapacke_dggqrf.c + lapacke_dggqrf_work.c + lapacke_dggrqf.c + lapacke_dggrqf_work.c + lapacke_dggsvd.c + lapacke_dggsvd_work.c + lapacke_dggsvp.c + lapacke_dggsvp_work.c + lapacke_dgtcon.c + lapacke_dgtcon_work.c + lapacke_dgtrfs.c + lapacke_dgtrfs_work.c + lapacke_dgtsv.c + lapacke_dgtsv_work.c + lapacke_dgtsvx.c + lapacke_dgtsvx_work.c + lapacke_dgttrf.c + lapacke_dgttrf_work.c + lapacke_dgttrs.c + lapacke_dgttrs_work.c + lapacke_dhgeqz.c + lapacke_dhgeqz_work.c + lapacke_dhsein.c + lapacke_dhsein_work.c + lapacke_dhseqr.c + lapacke_dhseqr_work.c + lapacke_dlacn2.c + lapacke_dlacn2_work.c + lapacke_dlacpy.c + lapacke_dlacpy_work.c + lapacke_dlag2s.c + lapacke_dlag2s_work.c + lapacke_dlamch.c + lapacke_dlamch_work.c + lapacke_dlange.c + lapacke_dlange_work.c + lapacke_dlansy.c + lapacke_dlansy_work.c + lapacke_dlantr.c + lapacke_dlantr_work.c + lapacke_dlapmr.c + lapacke_dlapmr_work.c + lapacke_dlapy2.c + lapacke_dlapy2_work.c + lapacke_dlapy3.c + lapacke_dlapy3_work.c + lapacke_dlarfb.c + lapacke_dlarfb_work.c + lapacke_dlarfg.c + lapacke_dlarfg_work.c + lapacke_dlarft.c + lapacke_dlarft_work.c + lapacke_dlarfx.c + lapacke_dlarfx_work.c + lapacke_dlarnv.c + lapacke_dlarnv_work.c + lapacke_dlartgp.c + lapacke_dlartgp_work.c + lapacke_dlartgs.c + lapacke_dlartgs_work.c + lapacke_dlaset.c + lapacke_dlaset_work.c + lapacke_dlasrt.c + lapacke_dlasrt_work.c + lapacke_dlaswp.c + lapacke_dlaswp_work.c + lapacke_dlauum.c + lapacke_dlauum_work.c + lapacke_dopgtr.c + lapacke_dopgtr_work.c + lapacke_dopmtr.c + lapacke_dopmtr_work.c + lapacke_dorbdb.c + lapacke_dorbdb_work.c + lapacke_dorcsd.c + lapacke_dorcsd_work.c + lapacke_dorgbr.c + lapacke_dorgbr_work.c + lapacke_dorghr.c + lapacke_dorghr_work.c + lapacke_dorglq.c + lapacke_dorglq_work.c + lapacke_dorgql.c + lapacke_dorgql_work.c + lapacke_dorgqr.c + lapacke_dorgqr_work.c + lapacke_dorgrq.c + lapacke_dorgrq_work.c + lapacke_dorgtr.c + lapacke_dorgtr_work.c + lapacke_dormbr.c + lapacke_dormbr_work.c + lapacke_dormhr.c + lapacke_dormhr_work.c + lapacke_dormlq.c + lapacke_dormlq_work.c + lapacke_dormql.c + lapacke_dormql_work.c + lapacke_dormqr.c + lapacke_dormqr_work.c + lapacke_dormrq.c + lapacke_dormrq_work.c + lapacke_dormrz.c + lapacke_dormrz_work.c + lapacke_dormtr.c + lapacke_dormtr_work.c + lapacke_dpbcon.c + lapacke_dpbcon_work.c + lapacke_dpbequ.c + lapacke_dpbequ_work.c + lapacke_dpbrfs.c + lapacke_dpbrfs_work.c + lapacke_dpbstf.c + lapacke_dpbstf_work.c + lapacke_dpbsv.c + lapacke_dpbsv_work.c + lapacke_dpbsvx.c + lapacke_dpbsvx_work.c + lapacke_dpbtrf.c + lapacke_dpbtrf_work.c + lapacke_dpbtrs.c + lapacke_dpbtrs_work.c + lapacke_dpftrf.c + lapacke_dpftrf_work.c + lapacke_dpftri.c + lapacke_dpftri_work.c + lapacke_dpftrs.c + lapacke_dpftrs_work.c + lapacke_dpocon.c + lapacke_dpocon_work.c + lapacke_dpoequ.c + lapacke_dpoequ_work.c + lapacke_dpoequb.c + lapacke_dpoequb_work.c + lapacke_dporfs.c + lapacke_dporfs_work.c + lapacke_dposv.c + lapacke_dposv_work.c + lapacke_dposvx.c + lapacke_dposvx_work.c + lapacke_dpotrf.c + lapacke_dpotrf_work.c + lapacke_dpotri.c + lapacke_dpotri_work.c + lapacke_dpotrs.c + lapacke_dpotrs_work.c + lapacke_dppcon.c + lapacke_dppcon_work.c + lapacke_dppequ.c + lapacke_dppequ_work.c + lapacke_dpprfs.c + lapacke_dpprfs_work.c + lapacke_dppsv.c + lapacke_dppsv_work.c + lapacke_dppsvx.c + lapacke_dppsvx_work.c + lapacke_dpptrf.c + lapacke_dpptrf_work.c + lapacke_dpptri.c + lapacke_dpptri_work.c + lapacke_dpptrs.c + lapacke_dpptrs_work.c + lapacke_dpstrf.c + lapacke_dpstrf_work.c + lapacke_dptcon.c + lapacke_dptcon_work.c + lapacke_dpteqr.c + lapacke_dpteqr_work.c + lapacke_dptrfs.c + lapacke_dptrfs_work.c + lapacke_dptsv.c + lapacke_dptsv_work.c + lapacke_dptsvx.c + lapacke_dptsvx_work.c + lapacke_dpttrf.c + lapacke_dpttrf_work.c + lapacke_dpttrs.c + lapacke_dpttrs_work.c + lapacke_dsbev.c + lapacke_dsbev_work.c + lapacke_dsbevd.c + lapacke_dsbevd_work.c + lapacke_dsbevx.c + lapacke_dsbevx_work.c + lapacke_dsbgst.c + lapacke_dsbgst_work.c + lapacke_dsbgv.c + lapacke_dsbgv_work.c + lapacke_dsbgvd.c + lapacke_dsbgvd_work.c + lapacke_dsbgvx.c + lapacke_dsbgvx_work.c + lapacke_dsbtrd.c + lapacke_dsbtrd_work.c + lapacke_dsfrk.c + lapacke_dsfrk_work.c + lapacke_dsgesv.c + lapacke_dsgesv_work.c + lapacke_dspcon.c + lapacke_dspcon_work.c + lapacke_dspev.c + lapacke_dspev_work.c + lapacke_dspevd.c + lapacke_dspevd_work.c + lapacke_dspevx.c + lapacke_dspevx_work.c + lapacke_dspgst.c + lapacke_dspgst_work.c + lapacke_dspgv.c + lapacke_dspgv_work.c + lapacke_dspgvd.c + lapacke_dspgvd_work.c + lapacke_dspgvx.c + lapacke_dspgvx_work.c + lapacke_dsposv.c + lapacke_dsposv_work.c + lapacke_dsprfs.c + lapacke_dsprfs_work.c + lapacke_dspsv.c + lapacke_dspsv_work.c + lapacke_dspsvx.c + lapacke_dspsvx_work.c + lapacke_dsptrd.c + lapacke_dsptrd_work.c + lapacke_dsptrf.c + lapacke_dsptrf_work.c + lapacke_dsptri.c + lapacke_dsptri_work.c + lapacke_dsptrs.c + lapacke_dsptrs_work.c + lapacke_dstebz.c + lapacke_dstebz_work.c + lapacke_dstedc.c + lapacke_dstedc_work.c + lapacke_dstegr.c + lapacke_dstegr_work.c + lapacke_dstein.c + lapacke_dstein_work.c + lapacke_dstemr.c + lapacke_dstemr_work.c + lapacke_dsteqr.c + lapacke_dsteqr_work.c + lapacke_dsterf.c + lapacke_dsterf_work.c + lapacke_dstev.c + lapacke_dstev_work.c + lapacke_dstevd.c + lapacke_dstevd_work.c + lapacke_dstevr.c + lapacke_dstevr_work.c + lapacke_dstevx.c + lapacke_dstevx_work.c + lapacke_dsycon.c + lapacke_dsycon_work.c + lapacke_dsyconv.c + lapacke_dsyconv_work.c + lapacke_dsyequb.c + lapacke_dsyequb_work.c + lapacke_dsyev.c + lapacke_dsyev_work.c + lapacke_dsyevd.c + lapacke_dsyevd_work.c + lapacke_dsyevr.c + lapacke_dsyevr_work.c + lapacke_dsyevx.c + lapacke_dsyevx_work.c + lapacke_dsygst.c + lapacke_dsygst_work.c + lapacke_dsygv.c + lapacke_dsygv_work.c + lapacke_dsygvd.c + lapacke_dsygvd_work.c + lapacke_dsygvx.c + lapacke_dsygvx_work.c + lapacke_dsyrfs.c + lapacke_dsyrfs_work.c + lapacke_dsysv.c + lapacke_dsysv_rook.c + lapacke_dsysv_rook_work.c + lapacke_dsysv_work.c + lapacke_dsysvx.c + lapacke_dsysvx_work.c + lapacke_dsyswapr.c + lapacke_dsyswapr_work.c + lapacke_dsytrd.c + lapacke_dsytrd_work.c + lapacke_dsytrf.c + lapacke_dsytrf_work.c + lapacke_dsytri.c + lapacke_dsytri2.c + lapacke_dsytri2_work.c + lapacke_dsytri2x.c + lapacke_dsytri2x_work.c + lapacke_dsytri_work.c + lapacke_dsytrs.c + lapacke_dsytrs2.c + lapacke_dsytrs2_work.c + lapacke_dsytrs_work.c + lapacke_dtbcon.c + lapacke_dtbcon_work.c + lapacke_dtbrfs.c + lapacke_dtbrfs_work.c + lapacke_dtbtrs.c + lapacke_dtbtrs_work.c + lapacke_dtfsm.c + lapacke_dtfsm_work.c + lapacke_dtftri.c + lapacke_dtftri_work.c + lapacke_dtfttp.c + lapacke_dtfttp_work.c + lapacke_dtfttr.c + lapacke_dtfttr_work.c + lapacke_dtgevc.c + lapacke_dtgevc_work.c + lapacke_dtgexc.c + lapacke_dtgexc_work.c + lapacke_dtgsen.c + lapacke_dtgsen_work.c + lapacke_dtgsja.c + lapacke_dtgsja_work.c + lapacke_dtgsna.c + lapacke_dtgsna_work.c + lapacke_dtgsyl.c + lapacke_dtgsyl_work.c + lapacke_dtpcon.c + lapacke_dtpcon_work.c + lapacke_dtpmqrt.c + lapacke_dtpmqrt_work.c + lapacke_dtpqrt.c + lapacke_dtpqrt2.c + lapacke_dtpqrt2_work.c + lapacke_dtpqrt_work.c + lapacke_dtprfb.c + lapacke_dtprfb_work.c + lapacke_dtprfs.c + lapacke_dtprfs_work.c + lapacke_dtptri.c + lapacke_dtptri_work.c + lapacke_dtptrs.c + lapacke_dtptrs_work.c + lapacke_dtpttf.c + lapacke_dtpttf_work.c + lapacke_dtpttr.c + lapacke_dtpttr_work.c + lapacke_dtrcon.c + lapacke_dtrcon_work.c + lapacke_dtrevc.c + lapacke_dtrevc_work.c + lapacke_dtrexc.c + lapacke_dtrexc_work.c + lapacke_dtrrfs.c + lapacke_dtrrfs_work.c + lapacke_dtrsen.c + lapacke_dtrsen_work.c + lapacke_dtrsna.c + lapacke_dtrsna_work.c + lapacke_dtrsyl.c + lapacke_dtrsyl_work.c + lapacke_dtrtri.c + lapacke_dtrtri_work.c + lapacke_dtrtrs.c + lapacke_dtrtrs_work.c + lapacke_dtrttf.c + lapacke_dtrttf_work.c + lapacke_dtrttp.c + lapacke_dtrttp_work.c + lapacke_dtzrzf.c + lapacke_dtzrzf_work.c +) + +set(SSRC + lapacke_sbbcsd.c + lapacke_sbbcsd_work.c + lapacke_sbdsdc.c + lapacke_sbdsdc_work.c + lapacke_sbdsqr.c + lapacke_sbdsqr_work.c + lapacke_sdisna.c + lapacke_sdisna_work.c + lapacke_sgbbrd.c + lapacke_sgbbrd_work.c + lapacke_sgbcon.c + lapacke_sgbcon_work.c + lapacke_sgbequ.c + lapacke_sgbequ_work.c + lapacke_sgbequb.c + lapacke_sgbequb_work.c + lapacke_sgbrfs.c + lapacke_sgbrfs_work.c + lapacke_sgbsv.c + lapacke_sgbsv_work.c + lapacke_sgbsvx.c + lapacke_sgbsvx_work.c + lapacke_sgbtrf.c + lapacke_sgbtrf_work.c + lapacke_sgbtrs.c + lapacke_sgbtrs_work.c + lapacke_sgebak.c + lapacke_sgebak_work.c + lapacke_sgebal.c + lapacke_sgebal_work.c + lapacke_sgebrd.c + lapacke_sgebrd_work.c + lapacke_sgecon.c + lapacke_sgecon_work.c + lapacke_sgeequ.c + lapacke_sgeequ_work.c + lapacke_sgeequb.c + lapacke_sgeequb_work.c + lapacke_sgees.c + lapacke_sgees_work.c + lapacke_sgeesx.c + lapacke_sgeesx_work.c + lapacke_sgeev.c + lapacke_sgeev_work.c + lapacke_sgeevx.c + lapacke_sgeevx_work.c + lapacke_sgehrd.c + lapacke_sgehrd_work.c + lapacke_sgejsv.c + lapacke_sgejsv_work.c + lapacke_sgelq2.c + lapacke_sgelq2_work.c + lapacke_sgelqf.c + lapacke_sgelqf_work.c + lapacke_sgels.c + lapacke_sgels_work.c + lapacke_sgelsd.c + lapacke_sgelsd_work.c + lapacke_sgelss.c + lapacke_sgelss_work.c + lapacke_sgelsy.c + lapacke_sgelsy_work.c + lapacke_sgemqrt.c + lapacke_sgemqrt_work.c + lapacke_sgeqlf.c + lapacke_sgeqlf_work.c + lapacke_sgeqp3.c + lapacke_sgeqp3_work.c + lapacke_sgeqpf.c + lapacke_sgeqpf_work.c + lapacke_sgeqr2.c + lapacke_sgeqr2_work.c + lapacke_sgeqrf.c + lapacke_sgeqrf_work.c + lapacke_sgeqrfp.c + lapacke_sgeqrfp_work.c + lapacke_sgeqrt.c + lapacke_sgeqrt2.c + lapacke_sgeqrt2_work.c + lapacke_sgeqrt3.c + lapacke_sgeqrt3_work.c + lapacke_sgeqrt_work.c + lapacke_sgerfs.c + lapacke_sgerfs_work.c + lapacke_sgerqf.c + lapacke_sgerqf_work.c + lapacke_sgesdd.c + lapacke_sgesdd_work.c + lapacke_sgesv.c + lapacke_sgesv_work.c + lapacke_sgesvd.c + lapacke_sgesvd_work.c + lapacke_sgesvj.c + lapacke_sgesvj_work.c + lapacke_sgesvx.c + lapacke_sgesvx_work.c + lapacke_sgetf2.c + lapacke_sgetf2_work.c + lapacke_sgetrf.c + lapacke_sgetrf_work.c + lapacke_sgetri.c + lapacke_sgetri_work.c + lapacke_sgetrs.c + lapacke_sgetrs_work.c + lapacke_sggbak.c + lapacke_sggbak_work.c + lapacke_sggbal.c + lapacke_sggbal_work.c + lapacke_sgges.c + lapacke_sgges_work.c + lapacke_sggesx.c + lapacke_sggesx_work.c + lapacke_sggev.c + lapacke_sggev_work.c + lapacke_sggevx.c + lapacke_sggevx_work.c + lapacke_sggglm.c + lapacke_sggglm_work.c + lapacke_sgghrd.c + lapacke_sgghrd_work.c + lapacke_sgglse.c + lapacke_sgglse_work.c + lapacke_sggqrf.c + lapacke_sggqrf_work.c + lapacke_sggrqf.c + lapacke_sggrqf_work.c + lapacke_sggsvd.c + lapacke_sggsvd_work.c + lapacke_sggsvp.c + lapacke_sggsvp_work.c + lapacke_sgtcon.c + lapacke_sgtcon_work.c + lapacke_sgtrfs.c + lapacke_sgtrfs_work.c + lapacke_sgtsv.c + lapacke_sgtsv_work.c + lapacke_sgtsvx.c + lapacke_sgtsvx_work.c + lapacke_sgttrf.c + lapacke_sgttrf_work.c + lapacke_sgttrs.c + lapacke_sgttrs_work.c + lapacke_shgeqz.c + lapacke_shgeqz_work.c + lapacke_shsein.c + lapacke_shsein_work.c + lapacke_shseqr.c + lapacke_shseqr_work.c + lapacke_slacn2.c + lapacke_slacn2_work.c + lapacke_slacpy.c + lapacke_slacpy_work.c + lapacke_slag2d.c + lapacke_slag2d_work.c + lapacke_slamch.c + lapacke_slamch_work.c + lapacke_slange.c + lapacke_slange_work.c + lapacke_slansy.c + lapacke_slansy_work.c + lapacke_slantr.c + lapacke_slantr_work.c + lapacke_slapmr.c + lapacke_slapmr_work.c + lapacke_slapy2.c + lapacke_slapy2_work.c + lapacke_slapy3.c + lapacke_slapy3_work.c + lapacke_slarfb.c + lapacke_slarfb_work.c + lapacke_slarfg.c + lapacke_slarfg_work.c + lapacke_slarft.c + lapacke_slarft_work.c + lapacke_slarfx.c + lapacke_slarfx_work.c + lapacke_slarnv.c + lapacke_slarnv_work.c + lapacke_slartgp.c + lapacke_slartgp_work.c + lapacke_slartgs.c + lapacke_slartgs_work.c + lapacke_slaset.c + lapacke_slaset_work.c + lapacke_slasrt.c + lapacke_slasrt_work.c + lapacke_slaswp.c + lapacke_slaswp_work.c + lapacke_slauum.c + lapacke_slauum_work.c + lapacke_sopgtr.c + lapacke_sopgtr_work.c + lapacke_sopmtr.c + lapacke_sopmtr_work.c + lapacke_sorbdb.c + lapacke_sorbdb_work.c + lapacke_sorcsd.c + lapacke_sorcsd_work.c + lapacke_sorgbr.c + lapacke_sorgbr_work.c + lapacke_sorghr.c + lapacke_sorghr_work.c + lapacke_sorglq.c + lapacke_sorglq_work.c + lapacke_sorgql.c + lapacke_sorgql_work.c + lapacke_sorgqr.c + lapacke_sorgqr_work.c + lapacke_sorgrq.c + lapacke_sorgrq_work.c + lapacke_sorgtr.c + lapacke_sorgtr_work.c + lapacke_sormbr.c + lapacke_sormbr_work.c + lapacke_sormhr.c + lapacke_sormhr_work.c + lapacke_sormlq.c + lapacke_sormlq_work.c + lapacke_sormql.c + lapacke_sormql_work.c + lapacke_sormqr.c + lapacke_sormqr_work.c + lapacke_sormrq.c + lapacke_sormrq_work.c + lapacke_sormrz.c + lapacke_sormrz_work.c + lapacke_sormtr.c + lapacke_sormtr_work.c + lapacke_spbcon.c + lapacke_spbcon_work.c + lapacke_spbequ.c + lapacke_spbequ_work.c + lapacke_spbrfs.c + lapacke_spbrfs_work.c + lapacke_spbstf.c + lapacke_spbstf_work.c + lapacke_spbsv.c + lapacke_spbsv_work.c + lapacke_spbsvx.c + lapacke_spbsvx_work.c + lapacke_spbtrf.c + lapacke_spbtrf_work.c + lapacke_spbtrs.c + lapacke_spbtrs_work.c + lapacke_spftrf.c + lapacke_spftrf_work.c + lapacke_spftri.c + lapacke_spftri_work.c + lapacke_spftrs.c + lapacke_spftrs_work.c + lapacke_spocon.c + lapacke_spocon_work.c + lapacke_spoequ.c + lapacke_spoequ_work.c + lapacke_spoequb.c + lapacke_spoequb_work.c + lapacke_sporfs.c + lapacke_sporfs_work.c + lapacke_sposv.c + lapacke_sposv_work.c + lapacke_sposvx.c + lapacke_sposvx_work.c + lapacke_spotrf.c + lapacke_spotrf_work.c + lapacke_spotri.c + lapacke_spotri_work.c + lapacke_spotrs.c + lapacke_spotrs_work.c + lapacke_sppcon.c + lapacke_sppcon_work.c + lapacke_sppequ.c + lapacke_sppequ_work.c + lapacke_spprfs.c + lapacke_spprfs_work.c + lapacke_sppsv.c + lapacke_sppsv_work.c + lapacke_sppsvx.c + lapacke_sppsvx_work.c + lapacke_spptrf.c + lapacke_spptrf_work.c + lapacke_spptri.c + lapacke_spptri_work.c + lapacke_spptrs.c + lapacke_spptrs_work.c + lapacke_spstrf.c + lapacke_spstrf_work.c + lapacke_sptcon.c + lapacke_sptcon_work.c + lapacke_spteqr.c + lapacke_spteqr_work.c + lapacke_sptrfs.c + lapacke_sptrfs_work.c + lapacke_sptsv.c + lapacke_sptsv_work.c + lapacke_sptsvx.c + lapacke_sptsvx_work.c + lapacke_spttrf.c + lapacke_spttrf_work.c + lapacke_spttrs.c + lapacke_spttrs_work.c + lapacke_ssbev.c + lapacke_ssbev_work.c + lapacke_ssbevd.c + lapacke_ssbevd_work.c + lapacke_ssbevx.c + lapacke_ssbevx_work.c + lapacke_ssbgst.c + lapacke_ssbgst_work.c + lapacke_ssbgv.c + lapacke_ssbgv_work.c + lapacke_ssbgvd.c + lapacke_ssbgvd_work.c + lapacke_ssbgvx.c + lapacke_ssbgvx_work.c + lapacke_ssbtrd.c + lapacke_ssbtrd_work.c + lapacke_ssfrk.c + lapacke_ssfrk_work.c + lapacke_sspcon.c + lapacke_sspcon_work.c + lapacke_sspev.c + lapacke_sspev_work.c + lapacke_sspevd.c + lapacke_sspevd_work.c + lapacke_sspevx.c + lapacke_sspevx_work.c + lapacke_sspgst.c + lapacke_sspgst_work.c + lapacke_sspgv.c + lapacke_sspgv_work.c + lapacke_sspgvd.c + lapacke_sspgvd_work.c + lapacke_sspgvx.c + lapacke_sspgvx_work.c + lapacke_ssprfs.c + lapacke_ssprfs_work.c + lapacke_sspsv.c + lapacke_sspsv_work.c + lapacke_sspsvx.c + lapacke_sspsvx_work.c + lapacke_ssptrd.c + lapacke_ssptrd_work.c + lapacke_ssptrf.c + lapacke_ssptrf_work.c + lapacke_ssptri.c + lapacke_ssptri_work.c + lapacke_ssptrs.c + lapacke_ssptrs_work.c + lapacke_sstebz.c + lapacke_sstebz_work.c + lapacke_sstedc.c + lapacke_sstedc_work.c + lapacke_sstegr.c + lapacke_sstegr_work.c + lapacke_sstein.c + lapacke_sstein_work.c + lapacke_sstemr.c + lapacke_sstemr_work.c + lapacke_ssteqr.c + lapacke_ssteqr_work.c + lapacke_ssterf.c + lapacke_ssterf_work.c + lapacke_sstev.c + lapacke_sstev_work.c + lapacke_sstevd.c + lapacke_sstevd_work.c + lapacke_sstevr.c + lapacke_sstevr_work.c + lapacke_sstevx.c + lapacke_sstevx_work.c + lapacke_ssycon.c + lapacke_ssycon_work.c + lapacke_ssyconv.c + lapacke_ssyconv_work.c + lapacke_ssyequb.c + lapacke_ssyequb_work.c + lapacke_ssyev.c + lapacke_ssyev_work.c + lapacke_ssyevd.c + lapacke_ssyevd_work.c + lapacke_ssyevr.c + lapacke_ssyevr_work.c + lapacke_ssyevx.c + lapacke_ssyevx_work.c + lapacke_ssygst.c + lapacke_ssygst_work.c + lapacke_ssygv.c + lapacke_ssygv_work.c + lapacke_ssygvd.c + lapacke_ssygvd_work.c + lapacke_ssygvx.c + lapacke_ssygvx_work.c + lapacke_ssyrfs.c + lapacke_ssyrfs_work.c + lapacke_ssysv.c + lapacke_ssysv_rook.c + lapacke_ssysv_rook_work.c + lapacke_ssysv_work.c + lapacke_ssysvx.c + lapacke_ssysvx_work.c + lapacke_ssyswapr.c + lapacke_ssyswapr_work.c + lapacke_ssytrd.c + lapacke_ssytrd_work.c + lapacke_ssytrf.c + lapacke_ssytrf_work.c + lapacke_ssytri.c + lapacke_ssytri2.c + lapacke_ssytri2_work.c + lapacke_ssytri2x.c + lapacke_ssytri2x_work.c + lapacke_ssytri_work.c + lapacke_ssytrs.c + lapacke_ssytrs2.c + lapacke_ssytrs2_work.c + lapacke_ssytrs_work.c + lapacke_stbcon.c + lapacke_stbcon_work.c + lapacke_stbrfs.c + lapacke_stbrfs_work.c + lapacke_stbtrs.c + lapacke_stbtrs_work.c + lapacke_stfsm.c + lapacke_stfsm_work.c + lapacke_stftri.c + lapacke_stftri_work.c + lapacke_stfttp.c + lapacke_stfttp_work.c + lapacke_stfttr.c + lapacke_stfttr_work.c + lapacke_stgevc.c + lapacke_stgevc_work.c + lapacke_stgexc.c + lapacke_stgexc_work.c + lapacke_stgsen.c + lapacke_stgsen_work.c + lapacke_stgsja.c + lapacke_stgsja_work.c + lapacke_stgsna.c + lapacke_stgsna_work.c + lapacke_stgsyl.c + lapacke_stgsyl_work.c + lapacke_stpcon.c + lapacke_stpcon_work.c + lapacke_stpmqrt.c + lapacke_stpmqrt_work.c + lapacke_stpqrt2.c + lapacke_stpqrt2_work.c + lapacke_stprfb.c + lapacke_stprfb_work.c + lapacke_stprfs.c + lapacke_stprfs_work.c + lapacke_stptri.c + lapacke_stptri_work.c + lapacke_stptrs.c + lapacke_stptrs_work.c + lapacke_stpttf.c + lapacke_stpttf_work.c + lapacke_stpttr.c + lapacke_stpttr_work.c + lapacke_strcon.c + lapacke_strcon_work.c + lapacke_strevc.c + lapacke_strevc_work.c + lapacke_strexc.c + lapacke_strexc_work.c + lapacke_strrfs.c + lapacke_strrfs_work.c + lapacke_strsen.c + lapacke_strsen_work.c + lapacke_strsna.c + lapacke_strsna_work.c + lapacke_strsyl.c + lapacke_strsyl_work.c + lapacke_strtri.c + lapacke_strtri_work.c + lapacke_strtrs.c + lapacke_strtrs_work.c + lapacke_strttf.c + lapacke_strttf_work.c + lapacke_strttp.c + lapacke_strttp_work.c + lapacke_stzrzf.c + lapacke_stzrzf_work.c +) + +set(ZSRC + lapacke_zbbcsd.c + lapacke_zbbcsd_work.c + lapacke_zbdsqr.c + lapacke_zbdsqr_work.c + lapacke_zcgesv.c + lapacke_zcgesv_work.c + lapacke_zcposv.c + lapacke_zcposv_work.c + lapacke_zgbbrd.c + lapacke_zgbbrd_work.c + lapacke_zgbcon.c + lapacke_zgbcon_work.c + lapacke_zgbequ.c + lapacke_zgbequ_work.c + lapacke_zgbequb.c + lapacke_zgbequb_work.c + lapacke_zgbrfs.c + lapacke_zgbrfs_work.c + lapacke_zgbsv.c + lapacke_zgbsv_work.c + lapacke_zgbsvx.c + lapacke_zgbsvx_work.c + lapacke_zgbtrf.c + lapacke_zgbtrf_work.c + lapacke_zgbtrs.c + lapacke_zgbtrs_work.c + lapacke_zgebak.c + lapacke_zgebak_work.c + lapacke_zgebal.c + lapacke_zgebal_work.c + lapacke_zgebrd.c + lapacke_zgebrd_work.c + lapacke_zgecon.c + lapacke_zgecon_work.c + lapacke_zgeequ.c + lapacke_zgeequ_work.c + lapacke_zgeequb.c + lapacke_zgeequb_work.c + lapacke_zgees.c + lapacke_zgees_work.c + lapacke_zgeesx.c + lapacke_zgeesx_work.c + lapacke_zgeev.c + lapacke_zgeev_work.c + lapacke_zgeevx.c + lapacke_zgeevx_work.c + lapacke_zgehrd.c + lapacke_zgehrd_work.c + lapacke_zgelq2.c + lapacke_zgelq2_work.c + lapacke_zgelqf.c + lapacke_zgelqf_work.c + lapacke_zgels.c + lapacke_zgels_work.c + lapacke_zgelsd.c + lapacke_zgelsd_work.c + lapacke_zgelss.c + lapacke_zgelss_work.c + lapacke_zgelsy.c + lapacke_zgelsy_work.c + lapacke_zgemqrt.c + lapacke_zgemqrt_work.c + lapacke_zgeqlf.c + lapacke_zgeqlf_work.c + lapacke_zgeqp3.c + lapacke_zgeqp3_work.c + lapacke_zgeqpf.c + lapacke_zgeqpf_work.c + lapacke_zgeqr2.c + lapacke_zgeqr2_work.c + lapacke_zgeqrf.c + lapacke_zgeqrf_work.c + lapacke_zgeqrfp.c + lapacke_zgeqrfp_work.c + lapacke_zgeqrt.c + lapacke_zgeqrt2.c + lapacke_zgeqrt2_work.c + lapacke_zgeqrt3.c + lapacke_zgeqrt3_work.c + lapacke_zgeqrt_work.c + lapacke_zgerfs.c + lapacke_zgerfs_work.c + lapacke_zgerqf.c + lapacke_zgerqf_work.c + lapacke_zgesdd.c + lapacke_zgesdd_work.c + lapacke_zgesv.c + lapacke_zgesv_work.c + lapacke_zgesvd.c + lapacke_zgesvd_work.c + lapacke_zgesvx.c + lapacke_zgesvx_work.c + lapacke_zgetf2.c + lapacke_zgetf2_work.c + lapacke_zgetrf.c + lapacke_zgetrf_work.c + lapacke_zgetri.c + lapacke_zgetri_work.c + lapacke_zgetrs.c + lapacke_zgetrs_work.c + lapacke_zggbak.c + lapacke_zggbak_work.c + lapacke_zggbal.c + lapacke_zggbal_work.c + lapacke_zgges.c + lapacke_zgges_work.c + lapacke_zggesx.c + lapacke_zggesx_work.c + lapacke_zggev.c + lapacke_zggev_work.c + lapacke_zggevx.c + lapacke_zggevx_work.c + lapacke_zggglm.c + lapacke_zggglm_work.c + lapacke_zgghrd.c + lapacke_zgghrd_work.c + lapacke_zgglse.c + lapacke_zgglse_work.c + lapacke_zggqrf.c + lapacke_zggqrf_work.c + lapacke_zggrqf.c + lapacke_zggrqf_work.c + lapacke_zggsvd.c + lapacke_zggsvd_work.c + lapacke_zggsvp.c + lapacke_zggsvp_work.c + lapacke_zgtcon.c + lapacke_zgtcon_work.c + lapacke_zgtrfs.c + lapacke_zgtrfs_work.c + lapacke_zgtsv.c + lapacke_zgtsv_work.c + lapacke_zgtsvx.c + lapacke_zgtsvx_work.c + lapacke_zgttrf.c + lapacke_zgttrf_work.c + lapacke_zgttrs.c + lapacke_zgttrs_work.c + lapacke_zhbev.c + lapacke_zhbev_work.c + lapacke_zhbevd.c + lapacke_zhbevd_work.c + lapacke_zhbevx.c + lapacke_zhbevx_work.c + lapacke_zhbgst.c + lapacke_zhbgst_work.c + lapacke_zhbgv.c + lapacke_zhbgv_work.c + lapacke_zhbgvd.c + lapacke_zhbgvd_work.c + lapacke_zhbgvx.c + lapacke_zhbgvx_work.c + lapacke_zhbtrd.c + lapacke_zhbtrd_work.c + lapacke_zhecon.c + lapacke_zhecon_work.c + lapacke_zheequb.c + lapacke_zheequb_work.c + lapacke_zheev.c + lapacke_zheev_work.c + lapacke_zheevd.c + lapacke_zheevd_work.c + lapacke_zheevr.c + lapacke_zheevr_work.c + lapacke_zheevx.c + lapacke_zheevx_work.c + lapacke_zhegst.c + lapacke_zhegst_work.c + lapacke_zhegv.c + lapacke_zhegv_work.c + lapacke_zhegvd.c + lapacke_zhegvd_work.c + lapacke_zhegvx.c + lapacke_zhegvx_work.c + lapacke_zherfs.c + lapacke_zherfs_work.c + lapacke_zhesv.c + lapacke_zhesv_work.c + lapacke_zhesvx.c + lapacke_zhesvx_work.c + lapacke_zheswapr.c + lapacke_zheswapr_work.c + lapacke_zhetrd.c + lapacke_zhetrd_work.c + lapacke_zhetrf.c + lapacke_zhetrf_work.c + lapacke_zhetri.c + lapacke_zhetri2.c + lapacke_zhetri2_work.c + lapacke_zhetri2x.c + lapacke_zhetri2x_work.c + lapacke_zhetri_work.c + lapacke_zhetrs.c + lapacke_zhetrs2.c + lapacke_zhetrs2_work.c + lapacke_zhetrs_work.c + lapacke_zhfrk.c + lapacke_zhfrk_work.c + lapacke_zhgeqz.c + lapacke_zhgeqz_work.c + lapacke_zhpcon.c + lapacke_zhpcon_work.c + lapacke_zhpev.c + lapacke_zhpev_work.c + lapacke_zhpevd.c + lapacke_zhpevd_work.c + lapacke_zhpevx.c + lapacke_zhpevx_work.c + lapacke_zhpgst.c + lapacke_zhpgst_work.c + lapacke_zhpgv.c + lapacke_zhpgv_work.c + lapacke_zhpgvd.c + lapacke_zhpgvd_work.c + lapacke_zhpgvx.c + lapacke_zhpgvx_work.c + lapacke_zhprfs.c + lapacke_zhprfs_work.c + lapacke_zhpsv.c + lapacke_zhpsv_work.c + lapacke_zhpsvx.c + lapacke_zhpsvx_work.c + lapacke_zhptrd.c + lapacke_zhptrd_work.c + lapacke_zhptrf.c + lapacke_zhptrf_work.c + lapacke_zhptri.c + lapacke_zhptri_work.c + lapacke_zhptrs.c + lapacke_zhptrs_work.c + lapacke_zhsein.c + lapacke_zhsein_work.c + lapacke_zhseqr.c + lapacke_zhseqr_work.c + lapacke_zlacgv.c + lapacke_zlacgv_work.c + lapacke_zlacn2.c + lapacke_zlacn2_work.c + lapacke_zlacp2.c + lapacke_zlacp2_work.c + lapacke_zlacpy.c + lapacke_zlacpy_work.c + lapacke_zlag2c.c + lapacke_zlag2c_work.c + lapacke_zlange.c + lapacke_zlange_work.c + lapacke_zlanhe.c + lapacke_zlanhe_work.c + lapacke_zlansy.c + lapacke_zlansy_work.c + lapacke_zlantr.c + lapacke_zlantr_work.c + lapacke_zlapmr.c + lapacke_zlapmr_work.c + lapacke_zlarfb.c + lapacke_zlarfb_work.c + lapacke_zlarfg.c + lapacke_zlarfg_work.c + lapacke_zlarft.c + lapacke_zlarft_work.c + lapacke_zlarfx.c + lapacke_zlarfx_work.c + lapacke_zlarnv.c + lapacke_zlarnv_work.c + lapacke_zlaset.c + lapacke_zlaset_work.c + lapacke_zlaswp.c + lapacke_zlaswp_work.c + lapacke_zlauum.c + lapacke_zlauum_work.c + lapacke_zpbcon.c + lapacke_zpbcon_work.c + lapacke_zpbequ.c + lapacke_zpbequ_work.c + lapacke_zpbrfs.c + lapacke_zpbrfs_work.c + lapacke_zpbstf.c + lapacke_zpbstf_work.c + lapacke_zpbsv.c + lapacke_zpbsv_work.c + lapacke_zpbsvx.c + lapacke_zpbsvx_work.c + lapacke_zpbtrf.c + lapacke_zpbtrf_work.c + lapacke_zpbtrs.c + lapacke_zpbtrs_work.c + lapacke_zpftrf.c + lapacke_zpftrf_work.c + lapacke_zpftri.c + lapacke_zpftri_work.c + lapacke_zpftrs.c + lapacke_zpftrs_work.c + lapacke_zpocon.c + lapacke_zpocon_work.c + lapacke_zpoequ.c + lapacke_zpoequ_work.c + lapacke_zpoequb.c + lapacke_zpoequb_work.c + lapacke_zporfs.c + lapacke_zporfs_work.c + lapacke_zposv.c + lapacke_zposv_work.c + lapacke_zposvx.c + lapacke_zposvx_work.c + lapacke_zpotrf.c + lapacke_zpotrf_work.c + lapacke_zpotri.c + lapacke_zpotri_work.c + lapacke_zpotrs.c + lapacke_zpotrs_work.c + lapacke_zppcon.c + lapacke_zppcon_work.c + lapacke_zppequ.c + lapacke_zppequ_work.c + lapacke_zpprfs.c + lapacke_zpprfs_work.c + lapacke_zppsv.c + lapacke_zppsv_work.c + lapacke_zppsvx.c + lapacke_zppsvx_work.c + lapacke_zpptrf.c + lapacke_zpptrf_work.c + lapacke_zpptri.c + lapacke_zpptri_work.c + lapacke_zpptrs.c + lapacke_zpptrs_work.c + lapacke_zpstrf.c + lapacke_zpstrf_work.c + lapacke_zptcon.c + lapacke_zptcon_work.c + lapacke_zpteqr.c + lapacke_zpteqr_work.c + lapacke_zptrfs.c + lapacke_zptrfs_work.c + lapacke_zptsv.c + lapacke_zptsv_work.c + lapacke_zptsvx.c + lapacke_zptsvx_work.c + lapacke_zpttrf.c + lapacke_zpttrf_work.c + lapacke_zpttrs.c + lapacke_zpttrs_work.c + lapacke_zspcon.c + lapacke_zspcon_work.c + lapacke_zsprfs.c + lapacke_zsprfs_work.c + lapacke_zspsv.c + lapacke_zspsv_work.c + lapacke_zspsvx.c + lapacke_zspsvx_work.c + lapacke_zsptrf.c + lapacke_zsptrf_work.c + lapacke_zsptri.c + lapacke_zsptri_work.c + lapacke_zsptrs.c + lapacke_zsptrs_work.c + lapacke_zstedc.c + lapacke_zstedc_work.c + lapacke_zstegr.c + lapacke_zstegr_work.c + lapacke_zstein.c + lapacke_zstein_work.c + lapacke_zstemr.c + lapacke_zstemr_work.c + lapacke_zsteqr.c + lapacke_zsteqr_work.c + lapacke_zsycon.c + lapacke_zsycon_work.c + lapacke_zsyconv.c + lapacke_zsyconv_work.c + lapacke_zsyequb.c + lapacke_zsyequb_work.c + lapacke_zsyrfs.c + lapacke_zsyrfs_work.c + lapacke_zsysv.c + lapacke_zsysv_rook.c + lapacke_zsysv_rook_work.c + lapacke_zsysv_work.c + lapacke_zsysvx.c + lapacke_zsysvx_work.c + lapacke_zsyswapr.c + lapacke_zsyswapr_work.c + lapacke_zsytrf.c + lapacke_zsytrf_work.c + lapacke_zsytri.c + lapacke_zsytri2.c + lapacke_zsytri2_work.c + lapacke_zsytri2x.c + lapacke_zsytri2x_work.c + lapacke_zsytri_work.c + lapacke_zsytrs.c + lapacke_zsytrs2.c + lapacke_zsytrs2_work.c + lapacke_zsytrs_work.c + lapacke_ztbcon.c + lapacke_ztbcon_work.c + lapacke_ztbrfs.c + lapacke_ztbrfs_work.c + lapacke_ztbtrs.c + lapacke_ztbtrs_work.c + lapacke_ztfsm.c + lapacke_ztfsm_work.c + lapacke_ztftri.c + lapacke_ztftri_work.c + lapacke_ztfttp.c + lapacke_ztfttp_work.c + lapacke_ztfttr.c + lapacke_ztfttr_work.c + lapacke_ztgevc.c + lapacke_ztgevc_work.c + lapacke_ztgexc.c + lapacke_ztgexc_work.c + lapacke_ztgsen.c + lapacke_ztgsen_work.c + lapacke_ztgsja.c + lapacke_ztgsja_work.c + lapacke_ztgsna.c + lapacke_ztgsna_work.c + lapacke_ztgsyl.c + lapacke_ztgsyl_work.c + lapacke_ztpcon.c + lapacke_ztpcon_work.c + lapacke_ztpmqrt.c + lapacke_ztpmqrt_work.c + lapacke_ztpqrt.c + lapacke_ztpqrt2.c + lapacke_ztpqrt2_work.c + lapacke_ztpqrt_work.c + lapacke_ztprfb.c + lapacke_ztprfb_work.c + lapacke_ztprfs.c + lapacke_ztprfs_work.c + lapacke_ztptri.c + lapacke_ztptri_work.c + lapacke_ztptrs.c + lapacke_ztptrs_work.c + lapacke_ztpttf.c + lapacke_ztpttf_work.c + lapacke_ztpttr.c + lapacke_ztpttr_work.c + lapacke_ztrcon.c + lapacke_ztrcon_work.c + lapacke_ztrevc.c + lapacke_ztrevc_work.c + lapacke_ztrexc.c + lapacke_ztrexc_work.c + lapacke_ztrrfs.c + lapacke_ztrrfs_work.c + lapacke_ztrsen.c + lapacke_ztrsen_work.c + lapacke_ztrsna.c + lapacke_ztrsna_work.c + lapacke_ztrsyl.c + lapacke_ztrsyl_work.c + lapacke_ztrtri.c + lapacke_ztrtri_work.c + lapacke_ztrtrs.c + lapacke_ztrtrs_work.c + lapacke_ztrttf.c + lapacke_ztrttf_work.c + lapacke_ztrttp.c + lapacke_ztrttp_work.c + lapacke_ztzrzf.c + lapacke_ztzrzf_work.c + lapacke_zunbdb.c + lapacke_zunbdb_work.c + lapacke_zuncsd.c + lapacke_zuncsd_work.c + lapacke_zungbr.c + lapacke_zungbr_work.c + lapacke_zunghr.c + lapacke_zunghr_work.c + lapacke_zunglq.c + lapacke_zunglq_work.c + lapacke_zungql.c + lapacke_zungql_work.c + lapacke_zungqr.c + lapacke_zungqr_work.c + lapacke_zungrq.c + lapacke_zungrq_work.c + lapacke_zungtr.c + lapacke_zungtr_work.c + lapacke_zunmbr.c + lapacke_zunmbr_work.c + lapacke_zunmhr.c + lapacke_zunmhr_work.c + lapacke_zunmlq.c + lapacke_zunmlq_work.c + lapacke_zunmql.c + lapacke_zunmql_work.c + lapacke_zunmqr.c + lapacke_zunmqr_work.c + lapacke_zunmrq.c + lapacke_zunmrq_work.c + lapacke_zunmrz.c + lapacke_zunmrz_work.c + lapacke_zunmtr.c + lapacke_zunmtr_work.c + lapacke_zupgtr.c + lapacke_zupgtr_work.c + lapacke_zupmtr.c + lapacke_zupmtr_work.c + lapacke_zsyr.c + lapacke_csyr.c + lapacke_zsyr_work.c + lapacke_csyr_work.c + lapacke_ilaver.c +) + +set(SRCX + lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c + lapacke_cgbrfsx_work.c lapacke_cporfsx_work.c lapacke_dgerfsx_work.c lapacke_sgbrfsx_work.c lapacke_ssyrfsx_work.c lapacke_zherfsx_work.c + lapacke_cgerfsx.c lapacke_csyrfsx.c lapacke_dporfsx.c lapacke_sgerfsx.c lapacke_zgbrfsx.c lapacke_zporfsx.c + lapacke_cgerfsx_work.c lapacke_csyrfsx_work.c lapacke_dporfsx_work.c lapacke_sgerfsx_work.c lapacke_zgbrfsx_work.c lapacke_zporfsx_work.c + lapacke_cherfsx.c lapacke_dgbrfsx.c lapacke_dsyrfsx.c lapacke_sporfsx.c lapacke_zgerfsx.c lapacke_zsyrfsx.c + lapacke_cherfsx_work.c lapacke_dgbrfsx_work.c lapacke_dsyrfsx_work.c lapacke_sporfsx_work.c lapacke_zgerfsx_work.c lapacke_zsyrfsx_work.c + lapacke_cgbsvxx.c lapacke_cposvxx.c lapacke_dgesvxx.c lapacke_sgbsvxx.c lapacke_ssysvxx.c lapacke_zhesvxx.c + lapacke_cgbsvxx_work.c lapacke_cposvxx_work.c lapacke_dgesvxx_work.c lapacke_sgbsvxx_work.c lapacke_ssysvxx_work.c lapacke_zhesvxx_work.c + lapacke_cgesvxx.c lapacke_csysvxx.c lapacke_dposvxx.c lapacke_sgesvxx.c lapacke_zgbsvxx.c lapacke_zposvxx.c + lapacke_cgesvxx_work.c lapacke_csysvxx_work.c lapacke_dposvxx_work.c lapacke_sgesvxx_work.c lapacke_zgbsvxx_work.c lapacke_zposvxx_work.c + lapacke_chesvxx.c lapacke_dgbsvxx.c lapacke_dsysvxx.c lapacke_sposvxx.c lapacke_zgesvxx.c lapacke_zsysvxx.c + lapacke_chesvxx_work.c lapacke_dgbsvxx_work.c lapacke_dsysvxx_work.c lapacke_sposvxx_work.c lapacke_zgesvxx_work.c lapacke_zsysvxx_work.c +) + + +# FILE PARTS OF TMGLIB +set(MATGEN + lapacke_clatms.c + lapacke_clatms_work.c + lapacke_dlatms.c + lapacke_dlatms_work.c + lapacke_slatms.c + lapacke_slatms_work.c + lapacke_zlatms.c + lapacke_zlatms_work.c + lapacke_clagge.c + lapacke_clagge_work.c + lapacke_dlagge.c + lapacke_dlagge_work.c + lapacke_slagge.c + lapacke_slagge_work.c + lapacke_zlagge.c + lapacke_zlagge_work.c + lapacke_claghe.c + lapacke_claghe_work.c + lapacke_zlaghe.c + lapacke_zlaghe_work.c + lapacke_clagsy.c + lapacke_clagsy_work.c + lapacke_dlagsy.c + lapacke_dlagsy_work.c + lapacke_slagsy.c + lapacke_slagsy_work.c + lapacke_zlagsy.c + lapacke_zlagsy_work.c +) + +set(LAPACKE_REL_SRC "") +if (BUILD_SINGLE) + list(APPEND LAPACKE_REL_SRC ${SSRC}) +endif () + +if (BUILD_DOUBLE) + list(APPEND LAPACKE_REL_SRC ${DSRC}) +endif () + +if (BUILD_COMPLEX) + list(APPEND LAPACKE_REL_SRC ${CSRC}) +endif () + +if (BUILD_COMPLEX16) + list(APPEND LAPACKE_REL_SRC ${ZSRC}) +endif () + +# add lapack-netlib folder to the sources +set(LAPACKE_SOURCES "") +foreach (LAE_FILE ${LAPACKE_REL_SRC}) + list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/lapacke/SRC/${LAE_FILE}") +endforeach () + +set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/lapacke/include") +execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${lapacke_include_dir}/lapacke_mangling_with_flags.h" "${lapacke_include_dir}/lapacke_mangling.h") +include_directories(${lapacke_include_dir}) +set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") From f874465bb81d10e7cdb88a10cff7d62df3fe370c Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 10 Aug 2015 14:10:44 -0500 Subject: [PATCH 111/137] Use cmake to build OpenBLAS GENERIC Target on MSVC x86 64-bit. Disable CBLAS and LAPACK. --- CMakeLists.txt | 20 ++++++++--- cmake/export.cmake | 60 +++++++++++++++++++++++++++++++ cmake/f_check.cmake | 3 ++ cmake/kernel.cmake | 15 +++++--- cmake/prebuild.cmake | 9 +++-- cmake/system.cmake | 15 ++++++++ cmake/utils.cmake | 4 +++ common.h | 45 ++++++++++++++++++----- common_x86_64.h | 35 +++++++++++++++--- driver/level2/CMakeLists.txt | 59 ++++++++++++++++++++++++++++++ driver/level2/gbmv_thread.c | 2 +- driver/level2/sbmv_thread.c | 2 +- driver/level2/spmv_thread.c | 2 +- driver/level2/tbmv_thread.c | 2 +- driver/level2/tpmv_thread.c | 2 +- driver/level2/trmv_thread.c | 2 +- driver/level2/zgbmv_k.c | 2 +- driver/level2/zhbmv_k.c | 10 +++--- driver/level2/zhpmv_k.c | 10 +++--- driver/level2/zsbmv_k.c | 6 ++-- driver/level2/zspmv_k.c | 3 +- driver/level2/ztbmv_L.c | 2 +- driver/level2/ztbmv_U.c | 2 +- driver/level2/ztbsv_L.c | 2 +- driver/level2/ztbsv_U.c | 2 +- driver/level2/ztpmv_L.c | 2 +- driver/level2/ztpmv_U.c | 2 +- driver/level2/ztpsv_L.c | 2 +- driver/level2/ztpsv_U.c | 2 +- driver/level2/ztrmv_L.c | 2 +- driver/level2/ztrmv_U.c | 2 +- driver/level2/ztrsv_L.c | 2 +- driver/level2/ztrsv_U.c | 2 +- driver/level3/CMakeLists.txt | 37 ++++++++++++++----- driver/others/CMakeLists.txt | 2 ++ interface/CMakeLists.txt | 39 ++++++++++++++++++-- interface/rotg.c | 3 +- interface/zaxpby.c | 4 +-- interface/zdot.c | 24 +++++++------ interface/zgemv.c | 17 +++++---- interface/zrotg.c | 20 +++++++---- kernel/CMakeLists.txt | 70 ++++++++++++++++++++++++++++++------ kernel/Makefile.L3 | 2 +- kernel/arm/zaxpby.c | 7 ++-- kernel/arm/zaxpy.c | 6 ++-- kernel/arm/zcopy.c | 6 ++-- kernel/arm/zdot.c | 18 +++++----- kernel/arm/zrot.c | 6 ++-- kernel/arm/zswap.c | 6 ++-- kernel/x86_64/KERNEL.generic | 6 ++++ openblas_config_template.h | 3 +- 51 files changed, 488 insertions(+), 120 deletions(-) create mode 100644 cmake/export.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d2e5d3c6..610cc9c90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,11 +15,13 @@ enable_language(C) set(OpenBLAS_LIBNAME openblas) ####### -option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS and CBLAS)" ON) +option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) +option(BUILD_WITHOUT_CBLAS "Without CBLAS" ON) option(BUILD_DEBUG "Build Debug Version" OFF) ####### if(BUILD_WITHOUT_LAPACK) set(NO_LAPACK 1) +set(NO_LAPACKE 1) endif() if(BUILD_DEBUG) @@ -27,6 +29,11 @@ set(CMAKE_BUILD_TYPE Debug) else() set(CMAKE_BUILD_TYPE Release) endif() + +if(BUILD_WITHOUT_CBLAS) +set(NO_CBLAS 1) +endif() + ####### @@ -51,7 +58,6 @@ endif () set(SUBDIRS ${BLASDIRS}) if (NOT NO_LAPACK) - message ("error 1") list(APPEND SUBDIRS lapack) endif () @@ -111,15 +117,21 @@ endforeach () # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. if (NOT NOFORTRAN AND NOT NO_LAPACK) - message ("error 2") include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") if (NOT NO_LAPACKE) include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") endif () endif () +#Only generate .def for dll on MSVC +if(MSVC) +set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") +endif() + # add objects to the openblas lib -add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) +add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${PROJECT_BINARY_DIR}/openblas.def) + +include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") #only build shared library for MSVC if(NOT MSVC) diff --git a/cmake/export.cmake b/cmake/export.cmake new file mode 100644 index 000000000..adf59101f --- /dev/null +++ b/cmake/export.cmake @@ -0,0 +1,60 @@ + +#Only generate .def for dll on MSVC +if(MSVC) + +set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1) + +if (NOT DEFINED ARCH) + set(ARCH_IN "x86_64") +else() + set(ARCH_IN ${ARCH}) +endif() + +if (${CORE} STREQUAL "generic") + set(ARCH_IN "GENERIC") +endif () + +if (NOT DEFINED EXPRECISION) + set(EXPRECISION_IN 0) +else() + set(EXPRECISION_IN ${EXPRECISION}) +endif() + +if (NOT DEFINED NO_CBLAS) + set(NO_CBLAS_IN 0) +else() + set(NO_CBLAS_IN ${NO_CBLAS}) +endif() + +if (NOT DEFINED NO_LAPACK) + set(NO_LAPACK_IN 0) +else() + set(NO_LAPACK_IN ${NO_LAPACK}) +endif() + +if (NOT DEFINED NO_LAPACKE) + set(NO_LAPACKE_IN 0) +else() + set(NO_LAPACKE_IN ${NO_LAPACKE}) +endif() + +if (NOT DEFINED NEED2UNDERSCORES) + set(NEED2UNDERSCORES_IN 0) +else() + set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES}) +endif() + +if (NOT DEFINED ONLY_CBLAS) + set(ONLY_CBLAS_IN 0) +else() + set(ONLY_CBLAS_IN ${ONLY_CBLAS}) +endif() + +add_custom_command( + TARGET ${OpenBLAS_LIBNAME} PRE_LINK + COMMAND perl + ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" + COMMENT "Create openblas.def file" + VERBATIM) + +endif() \ No newline at end of file diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index f7651db56..e189b683a 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -25,7 +25,10 @@ if (MSVC) include(CMakeForceCompiler) CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) endif () + +if (NOT NO_LAPACK) enable_language(Fortran) +endif() if (NOT ONLY_CBLAS) # N.B. f_check is not cross-platform, so instead try to use CMake variables diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake index 3a4d13837..c2ee62545 100644 --- a/cmake/kernel.cmake +++ b/cmake/kernel.cmake @@ -99,10 +99,10 @@ macro(SetDefaultL1) set(QGEMVTKERNEL gemv_t.S) set(XGEMVNKERNEL zgemv_n.S) set(XGEMVTKERNEL zgemv_t.S) - set(SCABS_KERNEL cabs.S) - set(DCABS_KERNEL cabs.S) - set(QCABS_KERNEL cabs.S) - set(LSAME_KERNEL lsame.S) + set(SCABS_KERNEL ../generic/cabs.c) + set(DCABS_KERNEL ../generic/cabs.S) + set(QCABS_KERNEL ../generic/cabs.S) + set(LSAME_KERNEL ../generic/lsame.c) set(SAXPBYKERNEL ../arm/axpby.c) set(DAXPBYKERNEL ../arm/axpby.c) set(CAXPBYKERNEL ../arm/zaxpby.c) @@ -156,3 +156,10 @@ macro(SetDefaultL2) set(XHEMV_V_KERNEL ../generic/zhemv_k.c) set(XHEMV_M_KERNEL ../generic/zhemv_k.c) endmacro () + +macro(SetDefaultL3) + set(SGEADD_KERNEL ../generic/geadd.c) + set(DGEADD_KERNEL ../generic/geadd.c) + set(CGEADD_KERNEL ../generic/zgeadd.c) + set(ZGEADD_KERNEL ../generic/zgeadd.c) +endmacro () \ No newline at end of file diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 901c237c4..c3fa48655 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -66,6 +66,11 @@ if (NOT MSVC) list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) endif () +if (MSVC) +#Use generic for MSVC now +set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) +endif() + set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") file(MAKE_DIRECTORY ${GETARCH_DIR}) @@ -73,7 +78,7 @@ try_compile(GETARCH_RESULT ${GETARCH_DIR} SOURCES ${GETARCH_SRC} COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GETARCH_LOG - COPY_FILE ${GETARCH_BIN} + COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} ) message(STATUS "Running getarch") @@ -95,7 +100,7 @@ try_compile(GETARCH2_RESULT ${GETARCH2_DIR} SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} OUTPUT_VARIABLE GETARCH2_LOG - COPY_FILE ${GETARCH2_BIN} + COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} ) # use the cmake binary w/ the -E param to run a shell command in a cross-platform way diff --git a/cmake/system.cmake b/cmake/system.cmake index 36f9b7cbd..8ec738a10 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -420,6 +420,21 @@ if (ONLY_CBLAS) set(LIB_COMPONENTS CBLAS) endif () + +# For GEMM3M +set(USE_GEMM3M 0) + +if (DEFINED ARCH) + if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") + set(USE_GEMM3M 1) + endif () + + if (${CORE} STREQUAL "generic") + set(USE_GEMM3M 0) + endif () +endif () + + #export OSNAME #export ARCH #export CORE diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 498c3840a..6e2a98069 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -102,6 +102,7 @@ endfunction () # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) # 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) +# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c) # STRING - compiles only the given type (e.g. DOUBLE) function(GenerateNamedObjects sources_in) @@ -151,6 +152,9 @@ function(GenerateNamedObjects sources_in) set(complex_only true) elseif (${ARGV7} EQUAL 3) set(mangle_complex_sources true) + elseif (${ARGV7} EQUAL 4) + set(mangle_complex_sources true) + set(complex_only true) elseif (NOT ${ARGV7} EQUAL 0) set(float_list ${ARGV7}) endif () diff --git a/common.h b/common.h index 1894a5c86..1fb2c7eaf 100644 --- a/common.h +++ b/common.h @@ -296,13 +296,6 @@ typedef int blasint; #define COMPSIZE 2 #endif -#if defined(C_PGI) || defined(C_SUN) -#define CREAL(X) (*((FLOAT *)&X + 0)) -#define CIMAG(X) (*((FLOAT *)&X + 1)) -#else -#define CREAL __real__ -#define CIMAG __imag__ -#endif #define Address_H(x) (((x)+(1<<15))>>16) #define Address_L(x) ((x)-((Address_H(x))<<16)) @@ -464,17 +457,49 @@ typedef char* env_var_t; extension since version 3.0. If neither are available, use a compatible structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus))) + (__GNUC__ >= 3 && !defined(__cplusplus)) || \ + _MSC_VER >= 1800) // Visual Studio 2013 supports complex #define OPENBLAS_COMPLEX_C99 typedef float _Complex openblas_complex_float; typedef double _Complex openblas_complex_double; typedef xdouble _Complex openblas_complex_xdouble; + #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) + #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) + #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) #else #define OPENBLAS_COMPLEX_STRUCT typedef struct { float real, imag; } openblas_complex_float; typedef struct { double real, imag; } openblas_complex_double; typedef struct { xdouble real, imag; } openblas_complex_xdouble; + #define openblas_make_complex_float(real, imag) {(real), (imag)} + #define openblas_make_complex_double(real, imag) {(real), (imag)} + #define openblas_make_complex_xdouble(real, imag) {(real), (imag)} #endif + +#ifdef XDOUBLE +#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble +#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i) +#elif defined(DOUBLE) +#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double +#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i) +#else +#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float +#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i) +#endif + +#if defined(C_PGI) || defined(C_SUN) +#define CREAL(X) (*((FLOAT *)&X + 0)) +#define CIMAG(X) (*((FLOAT *)&X + 1)) +#else +#ifdef OPENBLAS_COMPLEX_STRUCT +#define CREAL(Z) ((Z).real) +#define CIMAG(Z) ((Z).imag) +#else +#define CREAL __real__ +#define CIMAG __imag__ +#endif +#endif + #endif // ASSEMBLER #ifndef IFLUSH @@ -491,6 +516,10 @@ typedef char* env_var_t; #endif #endif +#if defined(C_MSVC) +#define inline __inline +#endif + #ifndef ASSEMBLER #ifndef MIN diff --git a/common_x86_64.h b/common_x86_64.h index efb902416..8bb87c7c0 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -41,6 +41,10 @@ #ifndef ASSEMBLER +#ifdef C_MSVC +#include +#endif + #ifdef C_SUN #define __asm__ __asm #define __volatile__ @@ -61,30 +65,39 @@ static void __inline blas_lock(volatile BLASULONG *address){ - int ret; + BLASULONG ret; do { while (*address) {YIELDING;}; +#ifndef C_MSVC __asm__ __volatile__( "xchgl %0, %1\n" : "=r"(ret), "=m"(*address) : "0"(1), "m"(*address) : "memory"); - +#else + ret=InterlockedExchange64((volatile LONG64 *)(address), 1); +#endif } while (ret); + } static __inline BLASULONG rpcc(void){ +#ifdef C_MSVC + return __rdtsc(); +#else BLASULONG a, d; __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); return ((BLASULONG)a + ((BLASULONG)d << 32)); +#endif } #define RPCC64BIT +#ifndef C_MSVC static __inline BLASULONG getstackaddr(void){ BLASULONG addr; @@ -93,22 +106,32 @@ static __inline BLASULONG getstackaddr(void){ return addr; } +#endif static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ +#ifdef C_MSVC + int cpuinfo[4]; + __cpuid(cpuinfo, op); + *eax=cpuinfo[0]; + *ebx=cpuinfo[1]; + *ecx=cpuinfo[2]; + *edx=cpuinfo[3]; +#else __asm__ __volatile__("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op)); +#endif } /* #define WHEREAMI */ -static inline int WhereAmI(void){ +static __inline int WhereAmI(void){ int eax, ebx, ecx, edx; int apicid; @@ -150,10 +173,14 @@ static inline int WhereAmI(void){ #define GET_IMAGE_CANCEL #ifdef SMP -#ifdef USE64BITINT +#if defined(USE64BITINT) static __inline blasint blas_quickdivide(blasint x, blasint y){ return x / y; } +#elif defined (C_MSVC) +static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){ + return x / y; +} #else extern unsigned int blas_quick_divide_table[]; diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index e4440be6d..5db4fb5ee 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -46,12 +46,28 @@ set(NU_SMP_SOURCES tbmv_thread.c ) +set(ULVM_COMPLEX_SOURCES + hbmv_k.c + hpmv_k.c + hpr_k.c + hpr2_k.c + her_k.c + her2_k.c +) + # objects that need LOWER set GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1 "" "" 3) # gbmv uses a lowercase n and t GenerateNamedObjects("gbmv_k.c" "" "gbmv_n" false "" "" "" 3) GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t" false "" "" "" 3) +# c/zgbmv +GenerateNamedObjects("zgbmv_k.c" "CONJ" "gbmv_r" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ" "gbmv_c" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "XCONJ" "gbmv_o" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "TRANS;XCONJ" "gbmv_u" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "CONJ;XCONJ" "gbmv_s" false "" "" "" 2) +GenerateNamedObjects("zgbmv_k.c" "TRANS;CONJ;XCONJ" "gbmv_d" false "" "" "" 2) # special defines for complex foreach (float_type ${FLOAT_TYPES}) @@ -82,6 +98,14 @@ foreach (float_type ${FLOAT_TYPES}) GenerateCombinationObjects("z${l_source}" "UNIT" "N" "TRANSA=4" 0 "${op_name}_CU" false ${float_type}) endforeach () + foreach (ulvm_source ${ULVM_COMPLEX_SOURCES}) + string(REGEX MATCH "[a-z0-9]+" op_name ${ulvm_source}) + GenerateNamedObjects("z${ulvm_source}" "" "${op_name}_U" false "" "" false ${float_type}) + GenerateNamedObjects("z${ulvm_source}" "LOWER" "${op_name}_L" false "" "" false ${float_type}) + GenerateNamedObjects("z${ulvm_source}" "HEMVREV" "${op_name}_V" false "" "" false ${float_type}) + GenerateNamedObjects("z${ulvm_source}" "LOWER;HEMVREV" "${op_name}_M" false "" "" false ${float_type}) + endforeach() + if (SMP) GenerateNamedObjects("gemv_thread.c" "CONJ" "gemv_thread_r" false "" "" false ${float_type}) @@ -103,6 +127,41 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("ger_thread.c" "XCONJ" "ger_thread_V" false "" "" false ${float_type}) GenerateNamedObjects("ger_thread.c" "XCONJ;CONJ" "ger_thread_D" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "HEMV" "hbmv_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "HEMV;LOWER" "hbmv_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "HEMVREV" "hbmv_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("sbmv_thread.c" "LOWER;HEMVREV" "hbmv_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("spmv_thread.c" "HEMV" "hpmv_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("spmv_thread.c" "HEMV;LOWER" "hpmv_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("spmv_thread.c" "HEMVREV" "hpmv_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("spmv_thread.c" "LOWER;HEMVREV" "hpmv_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("spr_thread.c" "HEMV" "hpr_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("spr_thread.c" "HEMV;LOWER" "hpr_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("spr_thread.c" "HEMVREV" "hpr_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("spr_thread.c" "LOWER;HEMVREV" "hpr_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("spr2_thread.c" "HEMV" "hpr2_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("spr2_thread.c" "HEMV;LOWER" "hpr2_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("spr2_thread.c" "HEMVREV" "hpr2_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("spr2_thread.c" "LOWER;HEMVREV" "hpr2_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("symv_thread.c" "HEMV" "hemv_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("symv_thread.c" "HEMV;LOWER" "hemv_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("symv_thread.c" "HEMVREV" "hemv_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("symv_thread.c" "LOWER;HEMVREV" "hemv_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "HEMVREV" "her_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "LOWER;HEMVREV" "her_thread_M" false "" "" false ${float_type}) + + GenerateNamedObjects("syr2_thread.c" "HER2" "her2_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "HER2;LOWER" "her2_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "HEMVREV" "her2_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "LOWER;HEMVREV" "her2_thread_M" false "" "" false ${float_type}) + foreach (nu_smp_src ${NU_SMP_SOURCES}) string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) GenerateCombinationObjects("${nu_smp_src}" "LOWER;UNIT" "U;N" "TRANSA=1" 0 "${op_name}_N" false ${float_type}) diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c index 9efe17092..ef9d58d76 100644 --- a/driver/level2/gbmv_thread.c +++ b/driver/level2/gbmv_thread.c @@ -64,7 +64,7 @@ static int gbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c index 5b7fc7332..a0377d638 100644 --- a/driver/level2/sbmv_thread.c +++ b/driver/level2/sbmv_thread.c @@ -60,7 +60,7 @@ static int sbmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif a = (FLOAT *)args -> a; diff --git a/driver/level2/spmv_thread.c b/driver/level2/spmv_thread.c index 93a2f44d4..0f47344df 100644 --- a/driver/level2/spmv_thread.c +++ b/driver/level2/spmv_thread.c @@ -60,7 +60,7 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif a = (FLOAT *)args -> a; diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c index 3c1249448..bbb1c50eb 100644 --- a/driver/level2/tbmv_thread.c +++ b/driver/level2/tbmv_thread.c @@ -76,7 +76,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/tpmv_thread.c b/driver/level2/tpmv_thread.c index 3b91cee45..47dc1daf9 100644 --- a/driver/level2/tpmv_thread.c +++ b/driver/level2/tpmv_thread.c @@ -81,7 +81,7 @@ static int tpmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/trmv_thread.c b/driver/level2/trmv_thread.c index 29e9799f6..a9dc2dc62 100644 --- a/driver/level2/trmv_thread.c +++ b/driver/level2/trmv_thread.c @@ -87,7 +87,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F #ifndef COMPLEX FLOAT result; #else - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #endif diff --git a/driver/level2/zgbmv_k.c b/driver/level2/zgbmv_k.c index 68d6045bd..d89932e33 100644 --- a/driver/level2/zgbmv_k.c +++ b/driver/level2/zgbmv_k.c @@ -77,7 +77,7 @@ void CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT alpha_r, FLOA FLOAT *bufferY = gemvbuffer; FLOAT *bufferX = gemvbuffer; #ifdef TRANS - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif if (incy != 1) { diff --git a/driver/level2/zhbmv_k.c b/driver/level2/zhbmv_k.c index 70e92e050..33f70d2c5 100644 --- a/driver/level2/zhbmv_k.c +++ b/driver/level2/zhbmv_k.c @@ -56,6 +56,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT *bufferX = sbmvbuffer; FLOAT temp[2]; + OPENBLAS_COMPLEX_FLOAT result; + if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); @@ -93,7 +95,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); + result = DOTC_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -118,7 +120,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); + result = DOTC_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -143,7 +145,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); + result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -168,7 +170,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, Y[i * 2 + 1] += alpha_r * temp[1] + alpha_i * temp[0]; if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); + result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); diff --git a/driver/level2/zhpmv_k.c b/driver/level2/zhpmv_k.c index 96bceaaf2..9e7ed7b0e 100644 --- a/driver/level2/zhpmv_k.c +++ b/driver/level2/zhpmv_k.c @@ -51,6 +51,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *bufferX = gemvbuffer; FLOAT temp[2]; + OPENBLAS_COMPLEX_FLOAT result; + if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + m * sizeof(FLOAT) * 2 + 4095) & ~4095); @@ -69,7 +71,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #ifndef HEMVREV #ifndef LOWER if (i > 0) { - FLOAT _Complex result = DOTC_K(i, a, 1, X, 1); + result = DOTC_K(i, a, 1, X, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -93,7 +95,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #else if (m - i > 1) { - FLOAT _Complex result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); + result = DOTC_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -118,7 +120,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #else #ifndef LOWER if (i > 0) { - FLOAT _Complex result = DOTU_K(i, a, 1, X, 1); + result = DOTU_K(i, a, 1, X, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -142,7 +144,7 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, #else if (m - i > 1) { - FLOAT _Complex result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); + result = DOTU_K(m - i - 1, a + (i + 1) * 2, 1, X + (i + 1) * 2, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); diff --git a/driver/level2/zsbmv_k.c b/driver/level2/zsbmv_k.c index 30e2f91c3..3ae74ce80 100644 --- a/driver/level2/zsbmv_k.c +++ b/driver/level2/zsbmv_k.c @@ -55,6 +55,8 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, FLOAT *bufferY = sbmvbuffer; FLOAT *bufferX = sbmvbuffer; + OPENBLAS_COMPLEX_FLOAT result; + if (incy != 1) { Y = bufferY; bufferX = (FLOAT *)(((BLASLONG)bufferY + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095); @@ -83,7 +85,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, a + offset * COMPSIZE, 1, Y + (i - length) * COMPSIZE, 1, NULL, 0); if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); + result = DOTU_K(length, a + offset * COMPSIZE, 1, X + (i - length) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); @@ -100,7 +102,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, a, 1, Y + i * COMPSIZE, 1, NULL, 0); if (length > 0) { - FLOAT _Complex result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); + result = DOTU_K(length, a + COMPSIZE, 1, X + (i + 1) * COMPSIZE, 1); Y[i * 2 + 0] += alpha_r * CREAL(result) - alpha_i * CIMAG(result); Y[i * 2 + 1] += alpha_r * CIMAG(result) + alpha_i * CREAL(result); diff --git a/driver/level2/zspmv_k.c b/driver/level2/zspmv_k.c index 76657eab9..432205e83 100644 --- a/driver/level2/zspmv_k.c +++ b/driver/level2/zspmv_k.c @@ -49,7 +49,8 @@ int CNAME(BLASLONG m, FLOAT alpha_r, FLOAT alpha_i, FLOAT *gemvbuffer = (FLOAT *)buffer; FLOAT *bufferY = gemvbuffer; FLOAT *bufferX = gemvbuffer; - FLOAT _Complex result; + + OPENBLAS_COMPLEX_FLOAT result; if (incy != 1) { Y = bufferY; diff --git a/driver/level2/ztbmv_L.c b/driver/level2/ztbmv_L.c index 74ff0bce1..1ac1cdef1 100644 --- a/driver/level2/ztbmv_L.c +++ b/driver/level2/ztbmv_L.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztbmv_U.c b/driver/level2/ztbmv_U.c index 933275de3..9aa203396 100644 --- a/driver/level2/ztbmv_U.c +++ b/driver/level2/ztbmv_U.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztbsv_L.c b/driver/level2/ztbsv_L.c index 0726bbd16..9aa701841 100644 --- a/driver/level2/ztbsv_L.c +++ b/driver/level2/ztbsv_L.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztbsv_U.c b/driver/level2/ztbsv_U.c index d022650bc..3722b1f71 100644 --- a/driver/level2/ztbsv_U.c +++ b/driver/level2/ztbsv_U.c @@ -49,7 +49,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc FLOAT *B = b; BLASLONG length; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztpmv_L.c b/driver/level2/ztpmv_L.c index 12c254c12..47e6df56c 100644 --- a/driver/level2/ztpmv_L.c +++ b/driver/level2/ztpmv_L.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztpmv_U.c b/driver/level2/ztpmv_U.c index 59708b8b8..da911fb4e 100644 --- a/driver/level2/ztpmv_U.c +++ b/driver/level2/ztpmv_U.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztpsv_L.c b/driver/level2/ztpsv_L.c index 3b8e562ce..a497e42a4 100644 --- a/driver/level2/ztpsv_L.c +++ b/driver/level2/ztpsv_L.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztpsv_U.c b/driver/level2/ztpsv_U.c index 601ac2f9d..28b824e3a 100644 --- a/driver/level2/ztpsv_U.c +++ b/driver/level2/ztpsv_U.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){ BLASLONG i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztrmv_L.c b/driver/level2/ztrmv_L.c index 63522cf81..92c86aec2 100644 --- a/driver/level2/ztrmv_L.c +++ b/driver/level2/ztrmv_L.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztrmv_U.c b/driver/level2/ztrmv_U.c index 8a4494fd7..f9671c9d6 100644 --- a/driver/level2/ztrmv_U.c +++ b/driver/level2/ztrmv_U.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex temp; + OPENBLAS_COMPLEX_FLOAT temp; #endif #ifndef UNIT FLOAT atemp1, atemp2, btemp1, btemp2; diff --git a/driver/level2/ztrsv_L.c b/driver/level2/ztrsv_L.c index 90f1c2c7d..dd3b2786e 100644 --- a/driver/level2/ztrsv_L.c +++ b/driver/level2/ztrsv_L.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level2/ztrsv_U.c b/driver/level2/ztrsv_U.c index bec8114f3..8803182a8 100644 --- a/driver/level2/ztrsv_U.c +++ b/driver/level2/ztrsv_U.c @@ -46,7 +46,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buf BLASLONG i, is, min_i; #if (TRANSA == 2) || (TRANSA == 4) - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; #endif #ifndef UNIT FLOAT ar, ai, br, bi, ratio, den; diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 376a0beeb..6d623b0c2 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -1,13 +1,5 @@ include_directories(${CMAKE_SOURCE_DIR}) -set(USE_GEMM3M 0) - -if (DEFINED ARCH) - if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") - set(USE_GEMM3M 1) - endif () -endif () - # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa # loop through gemm.c defines @@ -54,12 +46,41 @@ foreach (float_type ${FLOAT_TYPES}) GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type}) + + #hemm + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type}) + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE" 0 "hemm_R" false ${float_type}) + + #her2k + GenerateCombinationObjects("zher2k_kernel.c" "LOWER;CONJ" "U;N" "" 2 "her2k_kernel" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) + + if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) + #hemm + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) + GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) + #her2k + GenerateNamedObjects("zher2k_k.c" "HER2K" "her2k_UN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;TRANS;CONJ" "her2k_UC" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER" "her2k_LN" false "" "" false ${float_type}) + GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) + endif() + # special gemm defines for complex foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) string(TOLOWER ${gemm_define} gemm_define_LC) GenerateNamedObjects("gemm.c" "${gemm_define}" "gemm_${gemm_define_LC}" false "" "" false ${float_type}) + if(USE_GEMM3M) + GenerateNamedObjects("gemm3m.c" "${gemm_define}" "gemm3m_${gemm_define_LC}" false "" "" false ${float_type}) + endif() if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false ${float_type}) + if(USE_GEMM3M) + GenerateNamedObjects("gemm3m.c" "${gemm_define};THREADED_LEVEL3" "gemm3m_thread_${gemm_define_LC}" false "" "" false ${float_type}) + endif() endif () endforeach () endif () diff --git a/driver/others/CMakeLists.txt b/driver/others/CMakeLists.txt index 938f1daaf..b2af55e36 100644 --- a/driver/others/CMakeLists.txt +++ b/driver/others/CMakeLists.txt @@ -33,6 +33,8 @@ set(COMMON_SOURCES xerbla.c openblas_set_num_threads.c openblas_error_handle.c + openblas_get_num_procs.c + openblas_get_num_threads.c ) # these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index ae949235b..91565d2f2 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -1,13 +1,16 @@ include_directories(${CMAKE_SOURCE_DIR}) + set(BLAS1_SOURCES copy.c - asum.c nrm2.c + nrm2.c ) set(BLAS1_REAL_ONLY_SOURCES rotm.c rotmg.c # N.B. these do not have complex counterparts + rot.c + asum.c ) # these will have 'z' prepended for the complex version @@ -15,7 +18,7 @@ set(BLAS1_MANGLED_SOURCES axpy.c swap.c scal.c dot.c - rot.c rotg.c + rotg.c axpby.c ) @@ -31,6 +34,13 @@ set(BLAS2_SOURCES tpsv.c tpmv.c ) +set(BLAS2_COMPLEX_ONLY_MANGLED_SOURCES + hemv.c hbmv.c + her.c her2.c + hpmv.c hpr.c + hpr2.c +) + # these do not have separate 'z' sources set(BLAS3_SOURCES gemm.c symm.c @@ -39,6 +49,7 @@ set(BLAS3_SOURCES set(BLAS3_MANGLED_SOURCES omatcopy.c imatcopy.c + geadd.c ) # generate the BLAS objs once with and once without cblas @@ -65,9 +76,14 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS}) GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + GenerateNamedObjects("${BLAS2_COMPLEX_ONLY_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 4) GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + #sdsdot, dsdot + GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE") + GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE") + # trmm is trsm with a compiler flag set GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) @@ -86,17 +102,36 @@ endforeach () # complex-specific sources foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type}) GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "CONJ" "dotc" false "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "" "dotu" false "" "" false ${float_type}) + + GenerateNamedObjects("symm.c" "HEMM" "hemm" false "" "" false ${float_type}) + GenerateNamedObjects("syrk.c" "HEMM" "herk" false "" "" false ${float_type}) + GenerateNamedObjects("syr2k.c" "HEMM" "her2k" false "" "" false ${float_type}) + + if (USE_GEMM3M) + GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type}) + endif() endif () if (${float_type} STREQUAL "COMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX") GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX") + GenerateNamedObjects("zrot.c" "" "csrot" false "" "" true "COMPLEX") + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" false "" "" true "COMPLEX") + GenerateNamedObjects("max.c" "USE_ABS" "scamax" false "" "" true "COMPLEX") + GenerateNamedObjects("asum.c" "" "scasum" false "" "" true "COMPLEX") endif () if (${float_type} STREQUAL "ZCOMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX") GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX") + GenerateNamedObjects("zrot.c" "" "zdrot" false "" "" true "ZCOMPLEX") + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" false "" "" true "ZCOMPLEX") + GenerateNamedObjects("max.c" "USE_ABS" "dzamax" false "" "" true "ZCOMPLEX") + GenerateNamedObjects("asum.c" "" "dzasum" false "" "" true "ZCOMPLEX") endif () endforeach () diff --git a/interface/rotg.c b/interface/rotg.c index 49088ab02..a0e6efdab 100644 --- a/interface/rotg.c +++ b/interface/rotg.c @@ -14,8 +14,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ #endif - -#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da = *DA; long double db = *DB; diff --git a/interface/zaxpby.c b/interface/zaxpby.c index 9e8324432..1abb24de9 100644 --- a/interface/zaxpby.c +++ b/interface/zaxpby.c @@ -53,13 +53,13 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT * #endif - if (n <= 0) return; - FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); + if (n <= 0) return; + FUNCTION_PROFILE_START(); if (incx < 0) x -= (n - 1) * incx * 2; diff --git a/interface/zdot.c b/interface/zdot.c index 1380ce292..34dfb731a 100644 --- a/interface/zdot.c +++ b/interface/zdot.c @@ -57,21 +57,25 @@ #ifdef RETURN_BY_STRUCT MYTYPE NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { #elif defined RETURN_BY_STACK -void NAME(FLOAT _Complex *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { +void NAME(OPENBLAS_COMPLEX_FLOAT *result, blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { #else -FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { +OPENBLAS_COMPLEX_FLOAT NAME( blasint *N, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY) { #endif BLASLONG n = *N; BLASLONG incx = *INCX; BLASLONG incy = *INCY; #ifndef RETURN_BY_STACK - FLOAT _Complex ret; + OPENBLAS_COMPLEX_FLOAT ret; #endif #ifdef RETURN_BY_STRUCT MYTYPE myret; #endif +#ifndef RETURN_BY_STRUCT + OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); +#endif + PRINT_DEBUG_NAME; if (n <= 0) { @@ -80,10 +84,10 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, myret.i = 0.; return myret; #elif defined RETURN_BY_STACK - *result = ZERO; + *result = zero; return; #else - return ZERO; + return zero; #endif } @@ -144,21 +148,21 @@ FLOAT _Complex NAME( blasint *N, FLOAT *x, blasint *INCX, #else #ifdef FORCE_USE_STACK -void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT _Complex *result){ +void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT *result){ #else -FLOAT _Complex CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ +OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ - FLOAT _Complex ret; + OPENBLAS_COMPLEX_FLOAT ret; #endif PRINT_DEBUG_CNAME; if (n <= 0) { #ifdef FORCE_USE_STACK - *result = ZERO; + *result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); return; #else - return ZERO; + return OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); #endif } diff --git a/interface/zgemv.c b/interface/zgemv.c index 704034aaf..792f799e5 100644 --- a/interface/zgemv.c +++ b/interface/zgemv.c @@ -79,6 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *buffer; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, @@ -91,14 +94,14 @@ void NAME(char *TRANS, blasint *M, blasint *N, blasint lenx, leny; blasint i; - PRINT_DEBUG_NAME; - FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); + PRINT_DEBUG_NAME; + TOUPPER(trans); info = 0; @@ -153,14 +156,14 @@ void CNAME(enum CBLAS_ORDER order, GEMV_O, GEMV_U, GEMV_S, GEMV_D, }; - PRINT_DEBUG_CNAME; - FLOAT alpha_r = *(ALPHA + 0); FLOAT alpha_i = *(ALPHA + 1); FLOAT beta_r = *(BETA + 0); FLOAT beta_i = *(BETA + 1); + PRINT_DEBUG_CNAME; + trans = -1; info = 0; @@ -234,10 +237,10 @@ void CNAME(enum CBLAS_ORDER order, #ifdef SMP - int nthreads_max = num_cpu_avail(2); - int nthreads_avail = nthreads_max; + nthreads_max = num_cpu_avail(2); + nthreads_avail = nthreads_max; - double MNK = (double) m * (double) n; + MNK = (double) m * (double) n; if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) )) nthreads_max = 1; diff --git a/interface/zrotg.c b/interface/zrotg.c index e9e8a11df..187343d41 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -6,13 +6,7 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ - PRINT_DEBUG_NAME; - - IDEBUG_START; - - FUNCTION_PROFILE_START(); - -#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da_r = *(DA + 0); long double da_i = *(DA + 1); @@ -22,6 +16,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ long double ada = fabs(da_r) + fabs(da_i); + PRINT_DEBUG_NAME; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + if (ada == ZERO) { *C = ZERO; *(S + 0) = ONE; @@ -54,6 +54,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ FLOAT ada = fabs(da_r) + fabs(da_i); FLOAT adb; + PRINT_DEBUG_NAME; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + if (ada == ZERO) { *C = ZERO; *(S + 0) = ONE; diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index cd71101a5..d2cc77b11 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -17,6 +17,7 @@ endif () SetDefaultL1() SetDefaultL2() +SetDefaultL3() ParseMakefileVars("${KERNELDIR}/KERNEL") ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") @@ -65,8 +66,20 @@ foreach (float_type ${FLOAT_TYPES}) else () GenerateNamedObjects("${KERNELDIR}/${${float_char}DOTKERNEL}" "" "dot_k" false "" "" false ${float_type}) endif () + + if (${float_type} STREQUAL "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "srot_k" false "" "" false ${float_type}) + endif() + if (${float_type} STREQUAL "ZCOMPLEX") + GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "drot_k" false "" "" false ${float_type}) + endif() + endforeach () +#dsdot,sdsdot +GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") +GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") + # Makefile.L2 GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) @@ -86,6 +99,12 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false ${float_type}) + + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_U_KERNEL}" "HEMV" "hemv_U" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_L_KERNEL}" "HEMV;LOWER" "hemv_L" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_V_KERNEL}" "HEMV;HEMVREV" "hemv_V" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}HEMV_M_KERNEL}" "HEMV;HEMVREV;LOWER" "hemv_M" false "" "" false ${float_type}) + else () GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVNKERNEL}" "" "gemv_n" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) @@ -93,14 +112,9 @@ foreach (float_type ${FLOAT_TYPES}) endforeach () # Makefile.L3 -set(USE_GEMM3M false) set(USE_TRMM false) -if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") - set(USE_GEMM3M true) -endif () - -if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC") +if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic") set(USE_TRMM true) endif () @@ -155,6 +169,13 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) + + #hemm + GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type}) + else () GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) endif () @@ -241,11 +262,40 @@ foreach (float_type ${FLOAT_TYPES}) endif () endif () - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "domatcopy_k_cn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "domatcopy_k_rn" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "domatcopy_k_ct" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "domatcopy_k_rt" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CN}" "" "omatcopy_k_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RN}" "ROWM" "omatcopy_k_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CT}" "" "omatcopy_k_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RT}" "ROWM" "omatcopy_k_rt" false "" "" false ${float_type}) + if (NOT DEFINED ${float_char}OMATCOPY_CNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CNC ../arm/zomatcopy_cnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RNC ../arm/zomatcopy_rnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_CTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_CTC ../arm/zomatcopy_ctc.c) + endif () + endif () + if (NOT DEFINED ${float_char}OMATCOPY_RTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}OMATCOPY_RTC ../arm/zomatcopy_rtc.c) + endif () + endif () + + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CNC}" "CONJ" "omatcopy_k_cnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RNC}" "CONJ;ROWM" "omatcopy_k_rnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_CTC}" "CONJ" "omatcopy_k_ctc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type}) + endif() + + GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) endforeach () # Makefile.LA diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 4ef351de3..60b8fb57f 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -3459,7 +3459,7 @@ ifndef DGEADD_K DGEADD_K = ../generic/geadd.c endif -$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K) +$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEADD_K) $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ ifndef CGEADD_K diff --git a/kernel/arm/zaxpby.c b/kernel/arm/zaxpby.c index 2e0c2940d..d9948349d 100644 --- a/kernel/arm/zaxpby.c +++ b/kernel/arm/zaxpby.c @@ -38,13 +38,16 @@ int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FL BLASLONG ix,iy; FLOAT temp; + BLASLONG inc_x2; + BLASLONG inc_y2; + if ( n < 0 ) return(0); ix = 0; iy = 0; - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; if ( beta_r == 0.0 && beta_i == 0.0) { diff --git a/kernel/arm/zaxpy.c b/kernel/arm/zaxpy.c index 929ee8b54..1dcaeac27 100644 --- a/kernel/arm/zaxpy.c +++ b/kernel/arm/zaxpy.c @@ -41,6 +41,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, { BLASLONG i=0; BLASLONG ix,iy; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n < 0 ) return(0); if ( da_r == 0.0 && da_i == 0.0 ) return(0); @@ -48,8 +50,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, ix = 0; iy = 0; - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; while(i < n) { diff --git a/kernel/arm/zcopy.c b/kernel/arm/zcopy.c index f720d6ee5..07fe584c5 100644 --- a/kernel/arm/zcopy.c +++ b/kernel/arm/zcopy.c @@ -40,11 +40,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) { BLASLONG i=0; BLASLONG ix=0,iy=0; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n < 0 ) return(0); - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; while(i < n) { diff --git a/kernel/arm/zdot.c b/kernel/arm/zdot.c index 198104022..57f47e58e 100644 --- a/kernel/arm/zdot.c +++ b/kernel/arm/zdot.c @@ -40,24 +40,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) #else -openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) #endif { BLASLONG i=0; BLASLONG ix=0,iy=0; FLOAT dot[2]; - FLOAT _Complex result; + OPENBLAS_COMPLEX_FLOAT result; + BLASLONG inc_x2; + BLASLONG inc_y2; dot[0]=0.0; dot[1]=0.0; - __real__ result = 0.0 ; - __imag__ result = 0.0 ; + CREAL(result) = 0.0 ; + CIMAG(result) = 0.0 ; if ( n < 1 ) return(result); - BLASLONG inc_x2 = 2 * inc_x ; - BLASLONG inc_y2 = 2 * inc_y ; + inc_x2 = 2 * inc_x ; + inc_y2 = 2 * inc_y ; while(i < n) { @@ -73,8 +75,8 @@ openblas_complex_double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BL i++ ; } - __real__ result = dot[0]; - __imag__ result = dot[1]; + CREAL(result) = dot[0]; + CIMAG(result) = dot[1]; return(result); } diff --git a/kernel/arm/zrot.c b/kernel/arm/zrot.c index 356a4df72..98be68db8 100644 --- a/kernel/arm/zrot.c +++ b/kernel/arm/zrot.c @@ -41,11 +41,13 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT BLASLONG i=0; BLASLONG ix=0,iy=0; FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n <= 0 ) return(0); - BLASLONG inc_x2 = 2 * inc_x ; - BLASLONG inc_y2 = 2 * inc_y ; + inc_x2 = 2 * inc_x ; + inc_y2 = 2 * inc_y ; while(i < n) { diff --git a/kernel/arm/zswap.c b/kernel/arm/zswap.c index fcfb38506..ae4760ae0 100644 --- a/kernel/arm/zswap.c +++ b/kernel/arm/zswap.c @@ -42,11 +42,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm BLASLONG i=0; BLASLONG ix=0,iy=0; FLOAT temp[2]; + BLASLONG inc_x2; + BLASLONG inc_y2; if ( n < 0 ) return(0); - BLASLONG inc_x2 = 2 * inc_x; - BLASLONG inc_y2 = 2 * inc_y; + inc_x2 = 2 * inc_x; + inc_y2 = 2 * inc_y; while(i < n) { diff --git a/kernel/x86_64/KERNEL.generic b/kernel/x86_64/KERNEL.generic index 672edb069..a23e59f3f 100644 --- a/kernel/x86_64/KERNEL.generic +++ b/kernel/x86_64/KERNEL.generic @@ -155,5 +155,11 @@ XSYMV_L_KERNEL = ../generic/zsymv_k.c ZHEMV_U_KERNEL = ../generic/zhemv_k.c ZHEMV_L_KERNEL = ../generic/zhemv_k.c +LSAME_KERNEL = ../generic/lsame.c +SCABS_KERNEL = ../generic/cabs.c +DCABS_KERNEL = ../generic/cabs.c +QCABS_KERNEL = ../generic/cabs.c + +#Dump kernel CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c diff --git a/openblas_config_template.h b/openblas_config_template.h index 3b3435b0e..942a8f547 100644 --- a/openblas_config_template.h +++ b/openblas_config_template.h @@ -59,7 +59,8 @@ typedef int blasint; extension since version 3.0. If neither are available, use a compatible structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus))) + (__GNUC__ >= 3 && !defined(__cplusplus)) || \ + _MSC_VER >= 1800) // Visual Studio 2013 supports complex #define OPENBLAS_COMPLEX_C99 #ifndef __cplusplus #include From f8eba3d548d48d10a39bdc8cce9ab59dba5cda69 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 11 Aug 2015 16:25:16 -0500 Subject: [PATCH 112/137] Fixed cmake build bugs on Linux. --- CMakeLists.txt | 21 ++++++++++++++++++--- cmake/c_check.cmake | 9 +++++++-- cmake/kernel.cmake | 4 ++-- cmake/lapacke.cmake | 2 +- common.h | 3 +++ common_x86_64.h | 4 ++++ ctest/CMakeLists.txt | 6 ++++++ driver/level2/CMakeLists.txt | 12 ++++++------ test/CMakeLists.txt | 31 +++++++++++++++++++++++++++++++ 9 files changed, 78 insertions(+), 14 deletions(-) create mode 100644 ctest/CMakeLists.txt create mode 100644 test/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index 610cc9c90..e10df13a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,8 +15,10 @@ enable_language(C) set(OpenBLAS_LIBNAME openblas) ####### +if(MSVC) option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) -option(BUILD_WITHOUT_CBLAS "Without CBLAS" ON) +endif() +option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF) option(BUILD_DEBUG "Build Debug Version" OFF) ####### if(BUILD_WITHOUT_LAPACK) @@ -129,15 +131,28 @@ set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") endif() # add objects to the openblas lib -add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${PROJECT_BINARY_DIR}/openblas.def) +add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") -#only build shared library for MSVC + if(NOT MSVC) +#only build shared library for MSVC add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) + +if(SMP) +target_link_libraries(${OpenBLAS_LIBNAME} pthread) +target_link_libraries(${OpenBLAS_LIBNAME}_static pthread) +endif() + +#build test and ctest +enable_testing() +add_subdirectory(test) +if(NOT NO_CBLAS) +add_subdirectory(ctest) +endif() endif() set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index e32c18a43..89ec31446 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -25,9 +25,14 @@ # PTHREAD_CREATE_FUNC # N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. - -# TODO: detect FU (front underscore) by compiling ctest1.c +set(FU "") +if(APPLE) set(FU "_") +elseif(MSVC) +set(FU "_") +elseif(UNIX) +set(FU "") +endif() # Convert CMake vars into the format that OpenBLAS expects string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS) diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake index c2ee62545..fad84de51 100644 --- a/cmake/kernel.cmake +++ b/cmake/kernel.cmake @@ -100,8 +100,8 @@ macro(SetDefaultL1) set(XGEMVNKERNEL zgemv_n.S) set(XGEMVTKERNEL zgemv_t.S) set(SCABS_KERNEL ../generic/cabs.c) - set(DCABS_KERNEL ../generic/cabs.S) - set(QCABS_KERNEL ../generic/cabs.S) + set(DCABS_KERNEL ../generic/cabs.c) + set(QCABS_KERNEL ../generic/cabs.c) set(LSAME_KERNEL ../generic/lsame.c) set(SAXPBYKERNEL ../arm/axpby.c) set(DAXPBYKERNEL ../arm/axpby.c) diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index ce7f781dd..39ade0577 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -2058,7 +2058,7 @@ endif () # add lapack-netlib folder to the sources set(LAPACKE_SOURCES "") foreach (LAE_FILE ${LAPACKE_REL_SRC}) - list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/lapacke/SRC/${LAE_FILE}") + list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/lapacke/src/${LAE_FILE}") endforeach () set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/lapacke/include") diff --git a/common.h b/common.h index 1fb2c7eaf..c87ed6652 100644 --- a/common.h +++ b/common.h @@ -460,6 +460,9 @@ typedef char* env_var_t; (__GNUC__ >= 3 && !defined(__cplusplus)) || \ _MSC_VER >= 1800) // Visual Studio 2013 supports complex #define OPENBLAS_COMPLEX_C99 +#ifndef __cplusplus + #include +#endif typedef float _Complex openblas_complex_float; typedef double _Complex openblas_complex_double; typedef xdouble _Complex openblas_complex_xdouble; diff --git a/common_x86_64.h b/common_x86_64.h index 8bb87c7c0..54377695c 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -65,7 +65,11 @@ static void __inline blas_lock(volatile BLASULONG *address){ +#ifndef C_MSVC + int ret; +#else BLASULONG ret; +#endif do { while (*address) {YIELDING;}; diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt new file mode 100644 index 000000000..d3e15870b --- /dev/null +++ b/ctest/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories(${CMAKE_SOURCE_DIR}) + + +#foreach(test_bin ${OpenBLAS_Tests}) + +#endforeach() \ No newline at end of file diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 5db4fb5ee..3f40aa47d 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -154,13 +154,13 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("syr_thread.c" "HER" "her_thread_U" false "" "" false ${float_type}) GenerateNamedObjects("syr_thread.c" "HER;LOWER" "her_thread_L" false "" "" false ${float_type}) - GenerateNamedObjects("syr_thread.c" "HEMVREV" "her_thread_V" false "" "" false ${float_type}) - GenerateNamedObjects("syr_thread.c" "LOWER;HEMVREV" "her_thread_M" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "HERREV" "her_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("syr_thread.c" "LOWER;HERREV" "her_thread_M" false "" "" false ${float_type}) - GenerateNamedObjects("syr2_thread.c" "HER2" "her2_thread_U" false "" "" false ${float_type}) - GenerateNamedObjects("syr2_thread.c" "HER2;LOWER" "her2_thread_L" false "" "" false ${float_type}) - GenerateNamedObjects("syr2_thread.c" "HEMVREV" "her2_thread_V" false "" "" false ${float_type}) - GenerateNamedObjects("syr2_thread.c" "LOWER;HEMVREV" "her2_thread_M" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "HER" "her2_thread_U" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "HER;LOWER" "her2_thread_L" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "HERREV" "her2_thread_V" false "" "" false ${float_type}) + GenerateNamedObjects("syr2_thread.c" "LOWER;HERREV" "her2_thread_M" false "" "" false ${float_type}) foreach (nu_smp_src ${NU_SMP_SOURCES}) string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_src}) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 000000000..446fb8a44 --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,31 @@ +include_directories(${CMAKE_SOURCE_DIR}) + +enable_language(Fortran) + +set(OpenBLAS_Tests + sblat1 sblat2 sblat3 + dblat1 dblat2 dblat3 + cblat1 cblat2 cblat3 + zblat1 zblat2 zblat3) + +foreach(test_bin ${OpenBLAS_Tests}) +add_executable(${test_bin} ${test_bin}.f) +target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}_static) +endforeach() + +# $1 exec, $2 input, $3 output_result +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh +"rm -f $3\n" +"$1 < $2" +) + +set(float_types s d c z) +foreach(float_type ${float_types}) +string(TOUPPER ${float_type} float_type_upper) +add_test(NAME "${float_type}blas1" + COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat1") +add_test(NAME "${float_type}blas2" + COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat2" "${PROJECT_SOURCE_DIR}/test/${float_type}blat2.dat" ${float_type_upper}BLAT2.SUMM) +add_test(NAME "${float_type}blas3" + COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/${float_type}blat3" "${PROJECT_SOURCE_DIR}/test/${float_type}blat3.dat" ${float_type_upper}BLAT3.SUMM) +endforeach() \ No newline at end of file From 17ee2237c382e8ea3f9d3a8aa74aef4a1d12ff17 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 18 Aug 2015 22:43:42 -0500 Subject: [PATCH 113/137] Fixed cmake bug with NO_LAPACK=1 --- cmake/f_check.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake/f_check.cmake b/cmake/f_check.cmake index e189b683a..e8fe4bfa7 100644 --- a/cmake/f_check.cmake +++ b/cmake/f_check.cmake @@ -27,7 +27,10 @@ if (MSVC) endif () if (NOT NO_LAPACK) -enable_language(Fortran) + enable_language(Fortran) +else() + include(CMakeForceCompiler) + CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) endif() if (NOT ONLY_CBLAS) From f27942a68aced9933761c5d608dfb45e8fd10e8a Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Sat, 26 Sep 2015 14:42:44 +0000 Subject: [PATCH 114/137] Fixed make TARGET=CORTEXA9 and CORTEXA15 bug. --- getarch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/getarch.c b/getarch.c index 89e736a31..c05b90410 100644 --- a/getarch.c +++ b/getarch.c @@ -750,7 +750,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHITECTURE "ARM" #define SUBARCHITECTURE "CORTEXA9" #define SUBDIRNAME "arm" -#define ARCHCONFIG "-DCORTEXA9 " \ +#define ARCHCONFIG "-DCORTEXA9 -DARMV7 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ @@ -765,7 +765,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHITECTURE "ARM" #define SUBARCHITECTURE "CORTEXA15" #define SUBDIRNAME "arm" -#define ARCHCONFIG "-DCORTEXA15 " \ +#define ARCHCONFIG "-DCORTEXA15 -DARMV7 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ From 88bef3bffcfd1715bb26cc410a7d60163dcfd4a1 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 1 Oct 2015 15:07:04 -0400 Subject: [PATCH 115/137] default to lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX), as discussed in #646: if you rename the symbols, it is best to rename the library --- Makefile.rule | 3 +++ Makefile.system | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Makefile.rule b/Makefile.rule index 19f3fe3d9..22f222e3f 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -169,6 +169,9 @@ COMMON_PROF = -pg # 64 bit integer interfaces in OpenBLAS. # For details, https://github.com/xianyi/OpenBLAS/pull/459 # +# The same prefix and suffix are also added to the library name, +# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas +# # SYMBOLPREFIX= # SYMBOLSUFFIX= diff --git a/Makefile.system b/Makefile.system index 325ee6af9..42ad49849 100644 --- a/Makefile.system +++ b/Makefile.system @@ -880,12 +880,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3 CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3 endif -ifndef LIBNAMESUFFIX -LIBPREFIX = libopenblas -else -LIBPREFIX = libopenblas_$(LIBNAMESUFFIX) -endif - ifndef SYMBOLPREFIX SYMBOLPREFIX = endif @@ -894,6 +888,12 @@ ifndef SYMBOLSUFFIX SYMBOLSUFFIX = endif +ifndef LIBNAMESUFFIX +LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) +else +LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX) +endif + KERNELDIR = $(TOPDIR)/kernel/$(ARCH) include $(TOPDIR)/Makefile.$(ARCH) From e9493f69ebc706a974b1650cded21b70115668de Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 5 Oct 2015 00:58:07 -0400 Subject: [PATCH 116/137] Fix cross compilation suffix detection If the path involves `-`, this would have otherwise detected this as a cross compile suffix. --- c_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_check b/c_check index 0fdadb659..d694e7411 100644 --- a/c_check +++ b/c_check @@ -30,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { $cross_suffix = $1; } } else { - if ($ARGV[0] =~ /(.*-)(.*)/) { + if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) { $cross_suffix = $1; } } From 11ac4665c835a27a097e5021074cbf366bcb9765 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 5 Oct 2015 14:14:32 -0500 Subject: [PATCH 117/137] Fixed #654. Make sure the gotoblas_init function is run before all other static initializations. --- driver/others/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 49c57f911..f75a47d65 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -139,8 +139,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor(101))) +#define DESTRUCTOR __attribute__ ((destructor(101))) #ifdef DYNAMIC_ARCH gotoblas_t *gotoblas = NULL; From 90aa8e24b94ce8bbf73e60f9c69c50a2b18565da Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 7 Oct 2015 02:31:51 +0800 Subject: [PATCH 118/137] Refs #615. Import bug fixes for LAPACKE dormlq. --- lapack-netlib/lapacke/src/lapacke_cunmlq_work.c | 16 +++++++++------- lapack-netlib/lapacke/src/lapacke_dormlq_work.c | 16 +++++++++------- lapack-netlib/lapacke/src/lapacke_sormlq_work.c | 16 +++++++++------- lapack-netlib/lapacke/src/lapacke_zunmlq_work.c | 16 +++++++++------- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c b/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c index 1cd20e1ca..5cf66424d 100644 --- a/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_cunmlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, @@ -41,20 +41,22 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, lapack_complex_float* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_order == LAPACK_COL_MAJOR ) { + lapack_int r; + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_cunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lapack_int lda_t = MAX(1,k); lapack_int ldc_t = MAX(1,m); lapack_complex_float* a_t = NULL; lapack_complex_float* c_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_cunmlq_work", info ); return info; @@ -84,8 +86,8 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_cge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_cge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_cunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack-netlib/lapacke/src/lapacke_dormlq_work.c b/lapack-netlib/lapacke/src/lapacke_dormlq_work.c index 9a7a997fe..99a7c3c71 100644 --- a/lapack-netlib/lapacke/src/lapacke_dormlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_dormlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,27 +33,29 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_dormlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ) { lapack_int info = 0; + lapack_int r; lapack_int lda_t, ldc_t; double *a_t = NULL, *c_t = NULL; - if( matrix_order == LAPACK_COL_MAJOR ) { + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_dormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lda_t = MAX(1,k); ldc_t = MAX(1,m); /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_dormlq_work", info ); return info; @@ -81,8 +83,8 @@ lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_dge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_dormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack-netlib/lapacke/src/lapacke_sormlq_work.c b/lapack-netlib/lapacke/src/lapacke_sormlq_work.c index 7a7464d18..bbf55bd84 100644 --- a/lapack-netlib/lapacke/src/lapacke_sormlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_sormlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,27 +33,29 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_sormlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ) { lapack_int info = 0; + lapack_int r; lapack_int lda_t, ldc_t; float *a_t = NULL, *c_t = NULL; - if( matrix_order == LAPACK_COL_MAJOR ) { + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_sormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lda_t = MAX(1,k); ldc_t = MAX(1,m); /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_sormlq_work", info ); return info; @@ -81,8 +83,8 @@ lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_sge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_sge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_sormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c b/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c index 8677ac0bc..38a2d947a 100644 --- a/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_zunmlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, @@ -41,20 +41,22 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, lapack_complex_double* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_order == LAPACK_COL_MAJOR ) { + lapack_int r; + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_zunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lapack_int lda_t = MAX(1,k); lapack_int ldc_t = MAX(1,m); lapack_complex_double* a_t = NULL; lapack_complex_double* c_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_zunmlq_work", info ); return info; @@ -84,8 +86,8 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_zge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_zge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_zunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); From 3684706a121f9d9e1ccfc4a2bbb98f698eb04514 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 8 Oct 2015 15:07:24 +0000 Subject: [PATCH 119/137] Include time.h. --- common.h | 1 + 1 file changed, 1 insertion(+) diff --git a/common.h b/common.h index c367e38cb..c7660a7dd 100644 --- a/common.h +++ b/common.h @@ -114,6 +114,7 @@ extern "C" { #include #endif #include +#include #include #include #ifdef SMP From aca7d7e953712703b1571b05d47b7c2cd515d6f9 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 20 Oct 2015 03:35:25 +0800 Subject: [PATCH 120/137] Detect cmake test result. --- test/CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 446fb8a44..cd4497117 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -16,7 +16,14 @@ endforeach() # $1 exec, $2 input, $3 output_result FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh "rm -f $3\n" -"$1 < $2" +"$1 < $2\n" +"grep -q FATAL $3\n" +"if [ $? -eq 0 ]; then\n" +"echo Error\n" +"exit 1\n" +"else\n" +"exit 0\n" +"fi\n" ) set(float_types s d c z) From d8392c1245f87661c66f6e05d8c8091927630a4d Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 20 Oct 2015 04:30:55 +0800 Subject: [PATCH 121/137] Fixe cmake config bugs. --- driver/level2/CMakeLists.txt | 21 ++++- driver/level3/CMakeLists.txt | 29 +++++-- kernel/CMakeLists.txt | 162 +++++++++++++++++++++++------------ 3 files changed, 146 insertions(+), 66 deletions(-) diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 3f40aa47d..696767486 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -172,11 +172,26 @@ foreach (float_type ${FLOAT_TYPES}) endif () else () - # N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* - GenerateCombinationObjects("${U_SOURCES};${L_SOURCES}" "TRANSA;UNIT" "N;N" "" 3 "" false ${float_type}) + # For real number functions + foreach (u_source ${U_SOURCES}) + string(REGEX MATCH "[a-z]+" op_name ${u_source}) + GenerateCombinationObjects("${u_source}" "UNIT" "N" "" 0 "${op_name}_NU" false ${float_type}) + GenerateCombinationObjects("${u_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TL" false ${float_type}) + endforeach () + + foreach (l_source ${L_SOURCES}) + string(REGEX MATCH "[a-z]+" op_name ${l_source}) + GenerateCombinationObjects("${l_source}" "UNIT" "N" "" 0 "${op_name}_NL" false ${float_type}) + GenerateCombinationObjects("${l_source}" "UNIT" "N" "TRANSA" 0 "${op_name}_TU" false ${float_type}) + endforeach () + if (SMP) GenerateNamedObjects("ger_thread.c" "" "" false "" "" false ${float_type}) - GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "" 2 "" false ${float_type}) + foreach(nu_smp_source ${NU_SMP_SOURCES}) + string(REGEX MATCH "[a-z]+_[a-z]+" op_name ${nu_smp_source}) + GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "" 0 "${op_name}_N" false ${float_type}) + GenerateCombinationObjects("${nu_smp_source}" "LOWER;UNIT" "U;N" "TRANSA" 0 "${op_name}_T" false ${float_type}) + endforeach() endif () endif () endforeach () diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index 6d623b0c2..41d440f7a 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -13,11 +13,24 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) endif () endforeach () -GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "" 0) -GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "NN" 1) -GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "U;N" "" 1) -GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "" 2) +set(TRMM_TRSM_SOURCES + trmm_L.c + trmm_R.c + trsm_L.c + trsm_R.c) + +foreach(trmm_trsm_source ${TRMM_TRSM_SOURCES}) + string(REGEX MATCH "[a-z]+_[A-Z]+" op_name ${trmm_trsm_source}) + GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "" 0 "${op_name}N") + GenerateCombinationObjects("${trmm_trsm_source}" "UPPER;UNIT" "L;N" "TRANSA" 0 "${op_name}T") +endforeach() + +GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "NN" 1) +GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "" 1) +GenerateCombinationObjects("syr2k_k.c" "LOWER;TRANS" "U;N" "" 1) +GenerateCombinationObjects("syrk_kernel.c" "LOWER" "U" "" 2) +GenerateCombinationObjects("syr2k_kernel.c" "LOWER" "U" "" 2) if (SMP) # N.B. these do NOT have a float type (e.g. DOUBLE) defined! @@ -39,13 +52,13 @@ foreach (float_type ${FLOAT_TYPES}) GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) # Need to set CONJ for trmm and trsm GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type}) - GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trmm_LC" false ${float_type}) + GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type}) GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_RR" false ${float_type}) - GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trmm_RC" false ${float_type}) + GenerateCombinationObjects("trmm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_RC" false ${float_type}) GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_LR" false ${float_type}) - GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_LC" false ${float_type}) + GenerateCombinationObjects("trsm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_LC" false ${float_type}) GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trsm_RR" false ${float_type}) - GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANS;CONJ" 0 "trsm_RC" false ${float_type}) + GenerateCombinationObjects("trsm_R.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trsm_RC" false ${float_type}) #hemm GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN" 0 "hemm_L" false ${float_type}) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index d2cc77b11..bd32544f4 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -155,14 +155,14 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA;CONJ;CN" "trmm_kernel_LC" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "NN" "trmm_kernel_RN" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;NN" "trmm_kernel_LN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;NN" "trmm_kernel_LT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;CONJ;CN" "trmm_kernel_LR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;LEFT;TRANSA;CONJ;CN" "trmm_kernel_LC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;NN" "trmm_kernel_RN" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;NN" "trmm_kernel_RT" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;CONJ;NC" "trmm_kernel_RR" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "UPPER;LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type}) @@ -176,8 +176,106 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type}) GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type}) - else () + # symm for c and z + GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_M}.c" "" "symm_iutcopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) + + else () #For real GenerateCombinationObjects("${KERNELDIR}/${TRMM_KERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel" false ${float_type}) + + # symm for s and d + GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_M}.c" "" "symm_iutcopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type}) + + # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. + # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. + + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) + + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) + GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) + endif () GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false ${float_type}) @@ -185,53 +283,7 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false ${float_type}) - # These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define. - # Could simplify it a bit by pairing up by -UUNIT/-DUNIT. - GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/symm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "" "symm_iutcopy" false "" "" false ${float_type}) - - GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type}) - GenerateNamedObjects("generic/symm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type}) if (NOT DEFINED ${float_char}OMATCOPY_CN) if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") From 96f0bbe067706dae68301f7d049625f2cee689e3 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 21 Oct 2015 02:24:54 +0800 Subject: [PATCH 122/137] Fixed cmake bug on haswell. --- kernel/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index bd32544f4..2d7b18973 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -114,7 +114,7 @@ endforeach () # Makefile.L3 set(USE_TRMM false) -if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic") +if (${ARCH} STREQUAL "arm" OR ${ARCH} STREQUAL "arm64" OR "${TARGET}" STREQUAL "LONGSOON3B" OR "${TARGET}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell") set(USE_TRMM true) endif () From 8fade093aaa3748e5e879fcf0fed1833915d7aab Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 20 Oct 2015 14:37:22 -0500 Subject: [PATCH 123/137] Fixed cmake bug on Visual Studio. --- common.h | 3 +- interface/CMakeLists.txt | 55 +++++++++++++++++++------------- interface/zdot.c | 7 +++-- interface/zgemv.c | 3 ++ interface/zsyr.c | 3 ++ kernel/CMakeLists.txt | 68 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 113 insertions(+), 26 deletions(-) diff --git a/common.h b/common.h index 0b0bdb812..d8eadb421 100644 --- a/common.h +++ b/common.h @@ -502,8 +502,7 @@ static void __inline blas_lock(volatile BLASULONG *address){ extension since version 3.0. If neither are available, use a compatible structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ #if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus)) || \ - _MSC_VER >= 1800) // Visual Studio 2013 supports complex + (__GNUC__ >= 3 && !defined(__cplusplus)) ) #define OPENBLAS_COMPLEX_C99 #ifndef __cplusplus #include diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 91565d2f2..9ff924e5f 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -69,8 +69,8 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS}) set(DISABLE_COMPLEX 0) set(MANGLE_COMPLEX 3) if (CBLAS_FLAG EQUAL 1) - set(DISABLE_COMPLEX 1) - set(MANGLE_COMPLEX 1) +# set(DISABLE_COMPLEX 1) +# set(MANGLE_COMPLEX 1) endif () GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) @@ -98,43 +98,54 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS}) GenerateNamedObjects("imax.c" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG}) GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG}) -endforeach () # complex-specific sources foreach (float_type ${FLOAT_TYPES}) if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") - GenerateNamedObjects("zger.c" "" "geru" false "" "" false ${float_type}) - GenerateNamedObjects("zger.c" "CONJ" "gerc" false "" "" false ${float_type}) - GenerateNamedObjects("zdot.c" "CONJ" "dotc" false "" "" false ${float_type}) - GenerateNamedObjects("zdot.c" "" "dotu" false "" "" false ${float_type}) + GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "CONJ" "dotc" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "" "dotu" ${CBLAS_FLAG} "" "" false ${float_type}) - GenerateNamedObjects("symm.c" "HEMM" "hemm" false "" "" false ${float_type}) - GenerateNamedObjects("syrk.c" "HEMM" "herk" false "" "" false ${float_type}) - GenerateNamedObjects("syr2k.c" "HEMM" "her2k" false "" "" false ${float_type}) + GenerateNamedObjects("symm.c" "HEMM" "hemm" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("syrk.c" "HEMM" "herk" ${CBLAS_FLAG} "" "" false ${float_type}) + GenerateNamedObjects("syr2k.c" "HEMM" "her2k" ${CBLAS_FLAG} "" "" false ${float_type}) if (USE_GEMM3M) GenerateNamedObjects("gemm.c" "GEMM3M" "gemm3m" false "" "" false ${float_type}) endif() endif () if (${float_type} STREQUAL "COMPLEX") - GenerateNamedObjects("zscal.c" "SSCAL" "sscal" false "" "" false "COMPLEX") - GenerateNamedObjects("nrm2.c" "" "scnrm2" false "" "" true "COMPLEX") - GenerateNamedObjects("zrot.c" "" "csrot" false "" "" true "COMPLEX") - GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" false "" "" true "COMPLEX") - GenerateNamedObjects("max.c" "USE_ABS" "scamax" false "" "" true "COMPLEX") - GenerateNamedObjects("asum.c" "" "scasum" false "" "" true "COMPLEX") + GenerateNamedObjects("zscal.c" "SSCAL" "sscal" ${CBLAS_FLAG} "" "" false "COMPLEX") + GenerateNamedObjects("nrm2.c" "" "scnrm2" ${CBLAS_FLAG} "" "" true "COMPLEX") + GenerateNamedObjects("zrot.c" "" "csrot" ${CBLAS_FLAG} "" "" true "COMPLEX") + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" ${CBLAS_FLAG} "" "" true "COMPLEX") + GenerateNamedObjects("max.c" "USE_ABS" "scamax" ${CBLAS_FLAG} "" "" true "COMPLEX") + GenerateNamedObjects("asum.c" "" "scasum" ${CBLAS_FLAG} "" "" true "COMPLEX") endif () if (${float_type} STREQUAL "ZCOMPLEX") - GenerateNamedObjects("zscal.c" "SSCAL" "dscal" false "" "" false "ZCOMPLEX") - GenerateNamedObjects("nrm2.c" "" "dznrm2" false "" "" true "ZCOMPLEX") - GenerateNamedObjects("zrot.c" "" "zdrot" false "" "" true "ZCOMPLEX") - GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" false "" "" true "ZCOMPLEX") - GenerateNamedObjects("max.c" "USE_ABS" "dzamax" false "" "" true "ZCOMPLEX") - GenerateNamedObjects("asum.c" "" "dzasum" false "" "" true "ZCOMPLEX") + GenerateNamedObjects("zscal.c" "SSCAL" "dscal" ${CBLAS_FLAG} "" "" false "ZCOMPLEX") + GenerateNamedObjects("nrm2.c" "" "dznrm2" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + GenerateNamedObjects("zrot.c" "" "zdrot" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + GenerateNamedObjects("max.c" "USE_ABS" "dzamax" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + GenerateNamedObjects("asum.c" "" "dzasum" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") endif () endforeach () +endforeach () + +#Special functions for CBLAS +if (NOT DEFINED NO_CBLAS) + foreach (float_type ${FLOAT_TYPES}) + if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") + #cblas_dotc_sub cblas_dotu_sub + GenerateNamedObjects("zdot.c" "FORCE_USE_STACK" "dotu_sub" 1 "" "" false ${float_type}) + GenerateNamedObjects("zdot.c" "FORCE_USE_STACK;CONJ" "dotc_sub" 1 "" "" false ${float_type}) + endif() + endforeach () +endif() if (NOT DEFINED NO_LAPACK) set(LAPACK_SOURCES diff --git a/interface/zdot.c b/interface/zdot.c index 34dfb731a..d4d0fab92 100644 --- a/interface/zdot.c +++ b/interface/zdot.c @@ -153,16 +153,19 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy, OPENBLAS_COMPLEX_FLOAT CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ OPENBLAS_COMPLEX_FLOAT ret; + OPENBLAS_COMPLEX_FLOAT zero=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); #endif PRINT_DEBUG_CNAME; if (n <= 0) { #ifdef FORCE_USE_STACK - *result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); + //*result = OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); + CREAL(*result) = 0.0; + CIMAG(*result) = 0.0; return; #else - return OPENBLAS_MAKE_COMPLEX_FLOAT(0.0, 0.0); + return zero; #endif } diff --git a/interface/zgemv.c b/interface/zgemv.c index 792f799e5..520136b45 100644 --- a/interface/zgemv.c +++ b/interface/zgemv.c @@ -148,6 +148,9 @@ void CNAME(enum CBLAS_ORDER order, blasint info, t; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, diff --git a/interface/zsyr.c b/interface/zsyr.c index 5fe29cefa..09b1de578 100644 --- a/interface/zsyr.c +++ b/interface/zsyr.c @@ -121,6 +121,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO FLOAT *buffer; int trans, uplo; blasint info; + FLOAT * ALPHA = α + FLOAT alpha_r = ALPHA[0]; + FLOAT alpha_i = ALPHA[1]; #ifdef SMP int nthreads; #endif diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 2d7b18973..2156e3993 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -347,6 +347,74 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${${float_char}OMATCOPY_RTC}" "CONJ;ROWM" "omatcopy_k_rtc" false "" "" false ${float_type}) endif() + #imatcopy + if (NOT DEFINED ${float_char}IMATCOPY_CN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CN ../generic/zimatcopy_cn.c) + else () + set(${float_char}IMATCOPY_CN ../generic/imatcopy_cn.c) + endif () + endif () + + if (NOT DEFINED ${float_char}IMATCOPY_RN) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RN ../generic/zimatcopy_rn.c) + else () + set(${float_char}IMATCOPY_RN ../generic/imatcopy_rn.c) + endif () + endif () + + if (NOT DEFINED ${float_char}IMATCOPY_CT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CT ../generic/zimatcopy_ct.c) + else () + set(${float_char}IMATCOPY_CT ../generic/imatcopy_ct.c) + endif () + endif () + + if (NOT DEFINED ${float_char}IMATCOPY_RT) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RT ../generic/zimatcopy_rt.c) + else () + set(${float_char}IMATCOPY_RT ../generic/imatcopy_rt.c) + endif () + endif () + + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CN}" "" "imatcopy_k_cn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RN}" "ROWM" "imatcopy_k_rn" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CT}" "" "imatcopy_k_ct" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RT}" "ROWM" "imatcopy_k_rt" false "" "" false ${float_type}) + + + if (NOT DEFINED ${float_char}IMATCOPY_CNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CNC ../generic/zimatcopy_cnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}IMATCOPY_RNC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RNC ../generic/zimatcopy_rnc.c) + endif () + endif () + if (NOT DEFINED ${float_char}IMATCOPY_CTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_CTC ../generic/zimatcopy_ctc.c) + endif () + endif () + if (NOT DEFINED ${float_char}IMATCOPY_RTC) + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + set(${float_char}IMATCOPY_RTC ../generic/zimatcopy_rtc.c) + endif () + endif () + + if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C") + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CNC}" "CONJ" "imatcopy_k_cnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RNC}" "CONJ;ROWM" "imatcopy_k_rnc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_CTC}" "CONJ" "imatcopy_k_ctc" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}IMATCOPY_RTC}" "CONJ;ROWM" "imatcopy_k_rtc" false "" "" false ${float_type}) + endif() + + #geadd GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) endforeach () From 1ce054fcb3757e952423a09025c2f2a26023f8a5 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 22 Oct 2015 11:07:35 -0500 Subject: [PATCH 124/137] Refs #669. Fixed the build bug with gcc on Mac OS X. --- driver/others/memory.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/driver/others/memory.c b/driver/others/memory.c index f75a47d65..fca516145 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -139,8 +139,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) +#if defined(OS_DARWIN) && defined(C_GCC) +#define CONSTRUCTOR __attribute__ ((constructor)) +#define DESTRUCTOR __attribute__ ((destructor)) +#else #define CONSTRUCTOR __attribute__ ((constructor(101))) #define DESTRUCTOR __attribute__ ((destructor(101))) +#endif #ifdef DYNAMIC_ARCH gotoblas_t *gotoblas = NULL; From 5a291606adaf425f34dc7223a7775b93518c08cf Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Sat, 24 Oct 2015 01:16:34 +0800 Subject: [PATCH 125/137] Refs #671. the return of i?max cannot larger than N. --- interface/imax.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/interface/imax.c b/interface/imax.c index 55ffa7c6e..4378f1e22 100644 --- a/interface/imax.c +++ b/interface/imax.c @@ -136,6 +136,8 @@ blasint NAME(blasint *N, FLOAT *x, blasint *INCX){ ret = (blasint)MAX_K(n, x, incx); + if(ret > n) ret=n; + FUNCTION_PROFILE_END(COMPSIZE, n, 0); IDEBUG_END; @@ -159,6 +161,8 @@ CBLAS_INDEX CNAME(blasint n, FLOAT *x, blasint incx){ ret = MAX_K(n, x, incx); + if (ret > n) ret=n; + if (ret) ret --; FUNCTION_PROFILE_END(COMPSIZE, n, 0); From b809f99ceeeed355dddf61751278a7ab5a74a5f4 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 26 Oct 2015 23:42:21 +0800 Subject: [PATCH 126/137] Add CBLAS test for CMAKE. --- CMakeLists.txt | 4 ++++ ctest/CMakeLists.txt | 44 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e10df13a3..3b436dc13 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,11 @@ set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${Open enable_language(ASM) enable_language(C) +if(MSVC) +set(OpenBLAS_LIBNAME libopenblas) +else() set(OpenBLAS_LIBNAME openblas) +endif() ####### if(MSVC) diff --git a/ctest/CMakeLists.txt b/ctest/CMakeLists.txt index d3e15870b..dbe785bcb 100644 --- a/ctest/CMakeLists.txt +++ b/ctest/CMakeLists.txt @@ -1,6 +1,46 @@ include_directories(${CMAKE_SOURCE_DIR}) +enable_language(Fortran) -#foreach(test_bin ${OpenBLAS_Tests}) +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS") -#endforeach() \ No newline at end of file +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh +"$1 < $2\n" +) + +foreach(float_type ${FLOAT_TYPES}) + string(SUBSTRING ${float_type} 0 1 float_char_upper) + string(TOLOWER ${float_char_upper} float_char) + #level1 + add_executable(x${float_char}cblat1 + c_${float_char}blat1.f + c_${float_char}blas1.c) + target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static) + add_test(NAME "x${float_char}cblat1" + COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1") + + #level2 + add_executable(x${float_char}cblat2 + c_${float_char}blat2.f + c_${float_char}blas2.c + c_${float_char}2chke.c + auxiliary.c + c_xerbla.c + constant.c) + target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static) + add_test(NAME "x${float_char}cblat2" + COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2") + + #level3 + add_executable(x${float_char}cblat3 + c_${float_char}blat3.f + c_${float_char}blas3.c + c_${float_char}3chke.c + auxiliary.c + c_xerbla.c + constant.c) + target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static) + add_test(NAME "x${float_char}cblat3" + COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3") + +endforeach() From 309875de3cb752b1a83e6086f54e305ce5d63327 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 27 Oct 2015 02:54:53 +0800 Subject: [PATCH 127/137] Fix cmake bug on x86 32-bit. e.g. Build 32-bit on 64-bit Linux. cmake -DBINARY=32 --- cmake/system.cmake | 6 ++++++ kernel/CMakeLists.txt | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index 8ec738a10..71bf5c2cc 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -323,7 +323,13 @@ if (NOT DEFINED COMMON_OPT) set(COMMON_OPT "-O2") endif () +#For x86 32-bit +if (DEFINED BINARY AND BINARY EQUAL 32) + set(COMMON_OPT "${COMMON_OPT} -m32") +endif() + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") +set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") # TODO: not sure what PFLAGS is -hpa set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}") diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 2156e3993..43837a0f3 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -165,9 +165,9 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("${KERNELDIR}/${TRMM_KERNEL}" "TRMMKERNEL;TRANSA;CONJ;NC" "trmm_kernel_RC" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "UPPER;LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false ${float_type}) - GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) + GenerateNamedObjects("${KERNELDIR}/${${float_char}TRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false ${float_type}) #hemm From 53b6023a6cd458eecf22d03361881fda57d85f06 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 26 Oct 2015 14:52:13 -0500 Subject: [PATCH 128/137] Fix cmake bug on MSVC 32-bit. --- cmake/system.cmake | 4 ++++ kernel/CMakeLists.txt | 4 ++++ kernel/x86/cpuid_win.c | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+) create mode 100644 kernel/x86/cpuid_win.c diff --git a/cmake/system.cmake b/cmake/system.cmake index 71bf5c2cc..134e9c12d 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -325,11 +325,15 @@ endif () #For x86 32-bit if (DEFINED BINARY AND BINARY EQUAL 32) +if (NOT MSVC) set(COMMON_OPT "${COMMON_OPT} -m32") endif() +endif() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") +if(NOT MSVC) set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") +endif() # TODO: not sure what PFLAGS is -hpa set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}") diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 43837a0f3..8a3b021cc 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -22,7 +22,11 @@ ParseMakefileVars("${KERNELDIR}/KERNEL") ParseMakefileVars("${KERNELDIR}/KERNEL.${TARGET_CORE}") if (${ARCH} STREQUAL "x86") +if (NOT MSVC) GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true) +else() + GenerateNamedObjects("${KERNELDIR}/cpuid_win.c" "" "" false "" "" true) +endif() endif () # don't use float type name mangling here diff --git a/kernel/x86/cpuid_win.c b/kernel/x86/cpuid_win.c new file mode 100644 index 000000000..a1b00016b --- /dev/null +++ b/kernel/x86/cpuid_win.c @@ -0,0 +1,41 @@ +/*************************************************************************** +Copyright (c) 2015, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#if defined(_MSC_VER) && !defined(__clang__) + +#include + +void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) +{ + int cpuInfo[4] = {-1}; + __cpuid(cpuInfo, op); + *eax = cpuInfo[0]; + *ebx = cpuInfo[1]; + *ecx = cpuInfo[2]; + *edx = cpuInfo[3]; +} +#endif From e6d754fddcf44bf471f6abe9cdf2596db5fbe540 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 26 Oct 2015 15:08:17 -0500 Subject: [PATCH 129/137] Use AppVeyor Windows CI. --- appveyor.yml | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 appveyor.yml diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 000000000..9bb712a95 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,42 @@ +version: 0.2.14.{build} + +#environment: + +platform: + - x86 + - x64 +configuration: Release + +clone_folder: c:\projects\OpenBLAS + +init: + - git config --global core.autocrlf input + +build: + project: OpenBLAS.sln + +clone_depth: 5 + +#branches to build +branches: + only: + - master + - develop + - cmake + +skip_tags: true + +matrix: + fast_finish: true + +skip_commits: +# Add [av skip] to commit messages + message: /\[av skip\]/ + +before_build: + - echo Running cmake... + - cd c:\projects\OpenBLAS + - cmake . + +test_script: + From 69363622a896f1e8aef5454351b505509994fe7f Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 27 Oct 2015 05:10:40 +0800 Subject: [PATCH 130/137] Fix DYNAMIC_ARCH=1 bug. --- common_param.h | 50 ++++++++++++------------ kernel/setparam-ref.c | 7 ++++ kernel/x86_64/dtrmm_kernel_4x8_haswell.c | 4 +- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/common_param.h b/common_param.h index ab40ddeef..36d6149ea 100644 --- a/common_param.h +++ b/common_param.h @@ -830,56 +830,56 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); - int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); - int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); - int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); - int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); - int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); - int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); - int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); - int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); - int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); - int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); - int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); - int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); - int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); - int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); - int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); - int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); - int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); - int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); + + int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); + + int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + + int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + + int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 0eeac2e1f..a4d1486fc 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -550,6 +550,13 @@ gotoblas_t TABLE_NAME = { zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, + simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS, + dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS, + cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS, + cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS, + zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS, + zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS, + sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS }; diff --git a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c index 504c784ac..ac8c97d03 100644 --- a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c +++ b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c @@ -7,7 +7,7 @@ static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOA static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOAT *C0, FLOAT *C1, FLOAT *C2,FLOAT *C3, FLOAT *C4, FLOAT *C5,FLOAT *C6, FLOAT *C7) { - BLASLONG I = 0; + BLASLONG i = 0; BLASLONG temp1 = n * 8; __asm__ __volatile__ @@ -110,7 +110,7 @@ static void dtrmm_kernel_4x8( BLASLONG n, FLOAT *alpha ,FLOAT *a, FLOAT *b, FLOA : : - "a" (I), // 0 + "a" (i), // 0 "r" (temp1), // 1 "S" (a), // 2 "D" (b), // 3 From 0b2ad98e48b36d4110e37112199c457b5023b511 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 27 Oct 2015 05:11:07 +0800 Subject: [PATCH 131/137] Only test x64 Windows CI. --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 9bb712a95..3a0db79aa 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -3,8 +3,8 @@ version: 0.2.14.{build} #environment: platform: - - x86 - x64 + configuration: Release clone_folder: c:\projects\OpenBLAS From 1ac8c32f1d3a163f7cf2df45b7441e58dd569301 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 26 Oct 2015 18:08:54 -0500 Subject: [PATCH 132/137] [ci skip] Build Visual Studio 12 Win64 on Appveyor --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 3a0db79aa..74e1b00a9 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -36,7 +36,7 @@ skip_commits: before_build: - echo Running cmake... - cd c:\projects\OpenBLAS - - cmake . + - cmake -G "Visual Studio 12 Win64" . test_script: From 79d4a62e10f1f5aa9e38a6fc0a4a695023772c78 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 26 Oct 2015 18:14:41 -0500 Subject: [PATCH 133/137] Add AppVeyor badge. --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 16f874078..0ec86d362 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,9 @@ [![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) -[![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS) +Travis CI: [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS) +AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n8nkkx64/branch/develop?svg=true)](https://ci.appveyor.com/project/xianyi/openblas/branch/develop) ## Introduction OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. From 70642fe4ed4ffd74d305cd5c76cd6425dba4bbd1 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 26 Oct 2015 19:02:51 -0500 Subject: [PATCH 134/137] Refs #668. Raise the signal when pthread_create fails. Thank James K. Lowden for the patch. --- appveyor.yml | 2 +- driver/others/blas_server.c | 29 ++++++++++++++++++++--------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 74e1b00a9..4daf1bd3d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -39,4 +39,4 @@ before_build: - cmake -G "Visual Studio 12 Win64" . test_script: - + - echo Build OK! diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 1fd848c6b..b570bcd5a 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -70,9 +70,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /*********************************************************************/ #include "common.h" -#ifdef OS_LINUX +#if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) #include +#include #include +#include #endif #ifndef likely @@ -265,7 +267,7 @@ int get_node(void); static int increased_threads = 0; -static int blas_thread_server(void *arg){ +static void* blas_thread_server(void *arg){ /* Thread identifier */ BLASLONG cpu = (BLASLONG)arg; @@ -458,7 +460,7 @@ static int blas_thread_server(void *arg){ //pthread_exit(NULL); - return 0; + return NULL; } #ifdef MONITOR @@ -565,14 +567,23 @@ int blas_thread_init(void){ #ifdef NEED_STACKATTR ret=pthread_create(&blas_threads[i], &attr, - (void *)&blas_thread_server, (void *)i); + &blas_thread_server, (void *)i); #else ret=pthread_create(&blas_threads[i], NULL, - (void *)&blas_thread_server, (void *)i); + &blas_thread_server, (void *)i); #endif if(ret!=0){ - fprintf(STDERR,"OpenBLAS: pthread_creat error in blas_thread_init function. Error code:%d\n",ret); - exit(1); + struct rlimit rlim; + const char *msg = strerror(ret); + fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg); + if(0 == getrlimit(RLIMIT_NPROC, &rlim)) { + fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC " + "%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max)); + } + if(0 != raise(SIGINT)) { + fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n"); + exit(EXIT_FAILURE); + } } } @@ -832,10 +843,10 @@ void goto_set_num_threads(int num_threads) { #ifdef NEED_STACKATTR pthread_create(&blas_threads[i], &attr, - (void *)&blas_thread_server, (void *)i); + &blas_thread_server, (void *)i); #else pthread_create(&blas_threads[i], NULL, - (void *)&blas_thread_server, (void *)i); + &blas_thread_server, (void *)i); #endif } From 6040858b22f6c9a95cd22514f386f0f1f43c16f0 Mon Sep 17 00:00:00 2001 From: j-bo Date: Tue, 27 Oct 2015 13:55:24 +0100 Subject: [PATCH 135/137] Fix #673 Add lacking headers declarations when compiling for Android ARM7 --- driver/others/blas_server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index b570bcd5a..e1c644a80 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -70,7 +70,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /*********************************************************************/ #include "common.h" -#if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) +#if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) #include #include #include From 63c56d3da93f80f704144750ba7bbf5887bbb5a4 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 27 Oct 2015 10:47:55 -0500 Subject: [PATCH 136/137] Only include complex.h since Android 5.0 --- common.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/common.h b/common.h index d8eadb421..7b81c6fb6 100644 --- a/common.h +++ b/common.h @@ -98,6 +98,10 @@ extern "C" { #ifdef OS_ANDROID #define NO_SYSV_IPC +//Android NDK only supports complex.h since Android 5.0 +#if __ANDROID_API__ < 21 +#define FORCE_OPENBLAS_COMPLEX_STRUCT +#endif #endif #ifdef OS_WINDOWS @@ -501,12 +505,12 @@ static void __inline blas_lock(volatile BLASULONG *address){ /* C99 supports complex floating numbers natively, which GCC also offers as an extension since version 3.0. If neither are available, use a compatible structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ -#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus)) ) +#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ + (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) #define OPENBLAS_COMPLEX_C99 -#ifndef __cplusplus - #include -#endif + #ifndef __cplusplus + #include + #endif typedef float _Complex openblas_complex_float; typedef double _Complex openblas_complex_double; typedef xdouble _Complex openblas_complex_xdouble; From 8447498b504a3a903507715b3144e8b2a05ec0a0 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 27 Oct 2015 15:44:35 -0500 Subject: [PATCH 137/137] Update doc for OpenBLAS 0.2.15 version. [CI skipped] --- Changelog.txt | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ Makefile.rule | 2 +- appveyor.yml | 2 +- 3 files changed, 55 insertions(+), 2 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index 6941a9f96..422b8b519 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,57 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.2.15 +27-Oct-2015 +common: + * Support cmake on x86/x86-64. Natively compiling on MS Visual Studio. + (experimental. Thank Hank Anderson for the initial cmake porting work.) + + On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels. + e.g. cmake . + make + make test (Optional) + + On Windows MS Visual Studio, OpenBLAS cmake only support C kernels. + (OpenBLAS uses AT&T style assembly, which is not supported by MSVC.) + e.g. cmake -G "Visual Studio 12 Win64" . + Open OpenBLAS.sln and build. + + * Enable MAX_STACK_ALLOC flags by default. + Improve ger and gemv for small matrices. + * Improve gemv parallel with small m and large n case. + * Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler) + * Add vecLib benchmarks (#565. Thanks, Andreas Noack.) + * Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak) + * Fix LAPACKE lansy (#640. Thanks, Dan Kortschak) + * Import bug fixes for LAPACKE s/dormlq, c/zunmlq + * Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden) + * Remove g77 from compiler list. + * Enable AppVeyor Windows CI. + +x86/x86-64: + * Support pure C generic kernels for x86/x86-64. + * Support Intel Boardwell and Skylake by Haswell kernels. + * Support AMD Excavator by Steamroller kernels. + * Optimize s/d/c/zdot for Intel SandyBridge and Haswell. + * Optimize s/d/c/zdot for AMD Piledriver and Steamroller. + * Optimize s/d/c/zapxy for Intel SandyBridge and Haswell. + * Optimize s/d/c/zapxy for AMD Piledriver and Steamroller. + * Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge. + * Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller. + * Optimize s/dger for Intel SandyBridge. + * Optimize s/dsymv for Intel SandyBridge. + * Optimize ssymv for Intel Haswell. + * Optimize dgemv for Intel Nehalem and Haswell. + * Optimize dtrmm for Intel Haswell. + +ARM: + * Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard) + e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7 + * Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas) +POWER: + * Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.) + * Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.) + ==================================================================== Version 0.2.14 24-Mar-2015 diff --git a/Makefile.rule b/Makefile.rule index 22f222e3f..459f79c26 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.14 +VERSION = 0.2.15 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library diff --git a/appveyor.yml b/appveyor.yml index 4daf1bd3d..394e48854 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,4 +1,4 @@ -version: 0.2.14.{build} +version: 0.2.15.{build} #environment: