From 13d2d48e67d5d76714690e4fb6ab46e5a678431a Mon Sep 17 00:00:00 2001 From: Hank Anderson Date: Fri, 6 Feb 2015 13:42:20 -0600 Subject: [PATCH] Added yet another naming scheme for lapack functions. --- cmake/utils.cmake | 15 ++++++++++++++- lapack/CMakeLists.txt | 24 +++++++++++++++++++++--- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 286f271e2..641e7a7c6 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -136,6 +136,7 @@ endfunction () # If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU. # If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU. # If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects). +# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel # @param alternate_name replaces the source name as the object name (define codes are still appended) function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_type_in all_defines_in replace_scheme) @@ -184,7 +185,19 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_ string(SUBSTRING ${define_code} 0 1 define_code_first) string(SUBSTRING ${define_code} 1 -1 define_code_second) set(replace_code "${define_code_first}${last_letter}${define_code_second}") - else () + elseif (replace_scheme EQUAL 4) + # insert code before the last underscore and pass that in as the alternate_name + get_filename_component(alternate_name ${source_file} NAME_WE) + set(extra_underscore "") + # check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel) + string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name}) + string(LENGTH "${underscores}" underscores) + if (underscores EQUAL 0) + set(extra_underscore "_") + endif () + string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name}) + message(STATUS ${alternate_name}) + else() set(append_code ${define_code}) # replace_scheme should be 0 endif () endif () diff --git a/lapack/CMakeLists.txt b/lapack/CMakeLists.txt index c6d051c76..664ce6d6e 100644 --- a/lapack/CMakeLists.txt +++ b/lapack/CMakeLists.txt @@ -4,12 +4,10 @@ include_directories(${CMAKE_SOURCE_DIR}) # TODO: laswp needs arch specific code # TODO: getrs needs to be compiled with and without TRANS (and up to TRANS=4 in the complex case) # TODO: trti2 needs to be compiled with and without UNIT -# TODO: trtri needs to be compiled with and without UNIT set(LAPACK_SOURCES getf2/getf2_k.c getrf/getrf_single.c - getrs/getrs_single.c potrf/potrf_U_single.c potrf/potrf_L_single.c potf2/potf2_U.c @@ -20,6 +18,15 @@ set(LAPACK_SOURCES lauum/lauum_L_single.c trti2/trti2_U.c trti2/trti2_L.c +) + +# sources that need TRANS set +set(TRANS_SOURCES + getrs/getrs_single.c +) + +# sources that need UNIT set +set(UNIT_SOURCES trtri/trtri_U_single.c trtri/trtri_L_single.c ) @@ -55,11 +62,17 @@ if (SMP) set(PARALLEL_SOURCES ${GETRF_SRC} - getrs/getrs_parallel.c potrf/potrf_U_parallel.c potrf/potrf_L_parallel.c lauum/lauum_U_parallel.c lauum/lauum_L_parallel.c + ) + + list(APPEND TRANS_SOURCES + getrs/getrs_parallel.c + ) + + list(APPEND UNIT_SOURCES trtri/trtri_U_parallel.c trtri/trtri_L_parallel.c ) @@ -79,5 +92,10 @@ if (SMP) list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT}) endif () +GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "DOUBLE" "" 4) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) +GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "DOUBLE" "" 4) +list(APPEND DBLAS_OBJS ${COMBO_OBJ_LIST_OUT}) + set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS