diff --git a/cmake/utils.cmake b/cmake/utils.cmake index aaa669abd..d9c180fb6 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -70,7 +70,7 @@ endfunction () # @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) # @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) # @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) -# @param complex_only/real_only some routines have separate source files for complex and non-complex float types. +# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types. # 0 - compiles for all types # 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) # 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) @@ -88,7 +88,7 @@ function(GenerateNamedObjects sources_in) if (DEFINED ARGV3) set(use_cblas ${ARGV3}) else () - set(use_cblas 0) + set(use_cblas false) endif () if (DEFINED ARGV4) @@ -108,7 +108,7 @@ function(GenerateNamedObjects sources_in) set(real_only false) set(complex_only false) set(mangle_complex_sources false) - if (DEFINED ARGV7) + if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "") if (${ARGV7} EQUAL 1) set(real_only true) elseif (${ARGV7} EQUAL 2) @@ -204,6 +204,7 @@ endfunction () # If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel # @param alternate_name replaces the source name as the object name (define codes are still appended) # @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) +# @param complex_filename_scheme see GenerateNamedObjects function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme) if (DEFINED ARGV5) @@ -214,6 +215,10 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de set(no_float_type ${ARGV6}) endif () + if (DEFINED ARGV7) + set(complex_filename_scheme ${ARGV7}) + endif () + AllCombinations("${defines_in}" "${absent_codes_in}") set(define_combos ${LIST_OUT}) set(define_codes ${CODES_OUT}) @@ -271,7 +276,7 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_de endif () endif () - GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}" "${no_float_type}") + GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}") list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}") endforeach () endforeach () diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 030a14fd2..633b8a6fe 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -31,6 +31,7 @@ set(BLAS2_SOURCES tpsv.c tpmv.c ) +# these do not have separate 'z' sources set(BLAS3_SOURCES gemm.c symm.c trsm.c syrk.c syr2k.c @@ -53,12 +54,19 @@ endif () foreach (CBLAS_FLAG ${CBLAS_FLAGS}) - GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG}) - GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 1) - GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) - GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) - GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG}) - GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" 0 3) + # TODO: don't compile complex sources with cblas for now, the naming schemes are all different and they will have to be handled separately from SINGLE/DOUBLE + set(DISABLE_COMPLEX 0) + set(MANGLE_COMPLEX 3) + if (CBLAS_FLAG EQUAL 1) + set(DISABLE_COMPLEX 1) + set(MANGLE_COMPLEX 1) + endif () + GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) + GenerateNamedObjects("${BLAS1_REAL_ONLY_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false 1) + GenerateNamedObjects("${BLAS1_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) + GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${DISABLE_COMPLEX}) + GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) # trmm is trsm with a compiler flag set GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index ed598f22d..26922f50e 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -3,29 +3,36 @@ include_directories(${CMAKE_SOURCE_DIR}) set(LAPACK_SOURCES - getf2/getf2_k.c getrf/getrf_single.c potrf/potrf_U_single.c potrf/potrf_L_single.c - potf2/potf2_U.c - potf2/potf2_L.c - lauu2/lauu2_U.c - lauu2/lauu2_L.c lauum/lauum_U_single.c lauum/lauum_L_single.c ) +# add a 'z' to filename for complex version +set(LAPACK_MANGLED_SOURCES + getf2/getf2_k.c + lauu2/lauu2_U.c + lauu2/lauu2_L.c + potf2/potf2_U.c + potf2/potf2_L.c +) + # sources that need TRANS set +# this has a 'z' version set(TRANS_SOURCES getrs/getrs_single.c ) # sources that need UNIT set +# these do NOT have a z version set(UNIT_SOURCES trtri/trtri_U_single.c trtri/trtri_L_single.c ) +# these have a 'z' version set(UNIT_SOURCES2 trti2/trti2_U.c trti2/trti2_L.c @@ -51,6 +58,7 @@ set(ZLAPACK_SOURCES ) GenerateNamedObjects("${LAPACK_SOURCES}") +GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" "" 3) # TODO: laswp needs arch specific code GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus") @@ -64,40 +72,32 @@ if (SMP) set(GETRF_SRC getrf/getrf_parallel.c) endif () + # these do not have 'z' versions set(PARALLEL_SOURCES ${GETRF_SRC} - potrf/potrf_U_parallel.c - potrf/potrf_L_parallel.c lauum/lauum_U_parallel.c lauum/lauum_L_parallel.c + potrf/potrf_U_parallel.c + potrf/potrf_L_parallel.c ) + # this has a z version list(APPEND TRANS_SOURCES getrs/getrs_parallel.c ) + # these do NOT have a z version list(APPEND UNIT_SOURCES trtri/trtri_U_parallel.c trtri/trtri_L_parallel.c ) - set(ZPARALLEL_SOURCES - ${GETRF_SRC} - getrs/zgetrs_parallel.c - potrf/potrf_U_parallel.c - potrf/potrf_L_parallel.c - lauum/lauum_U_parallel.c - lauum/lauum_L_parallel.c - trtri/trtri_U_parallel.c - trtri/trtri_L_parallel.c - ) - GenerateNamedObjects("${PARALLEL_SOURCES}") endif () -GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4) +GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4 "" "" 3) GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4) -GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0) +GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0 "" "" 3) set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS