Changed generate functions to iterate through a list of float types.

This will generate obj files for SINGLE/DOUBLE/COMPLEX/DOUBLE COMPLEX.
This commit is contained in:
Hank Anderson 2015-02-15 17:44:37 -06:00
parent e74462a3f5
commit 4662a0b13a
10 changed files with 222 additions and 174 deletions

View File

@ -12,7 +12,7 @@ set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${Open
enable_language(Fortran)
enable_language(ASM)
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only DOUBLE and x86 support is currently available.")
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.")
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake")
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake")
@ -36,6 +36,36 @@ if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
set(BUILD_COMPLEX16 true)
endif ()
set(FLOAT_TYPES "")
if (BUILD_SINGLE)
message(STATUS "Building Single Precision")
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
endif ()
if (BUILD_DOUBLE)
message(STATUS "Building Double Precision")
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
endif ()
if (BUILD_COMPLEX)
message(STATUS "Building Complex Precision")
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
endif ()
if (BUILD_COMPLEX16)
message(STATUS "Building Double Complex Precision")
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench)
# all :: libs netlib tests shared
@ -62,8 +92,8 @@ endforeach ()
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
set(TARGET_OBJS "")
foreach (DBLAS_OBJ ${DBLAS_OBJS})
get_target_property(PREV_DEFS ${DBLAS_OBJ} COMPILE_DEFINITIONS)
set_target_properties(${DBLAS_OBJ} PROPERTIES COMPILE_DEFINITIONS "${PREV_DEFS};DOUBLE")
#get_target_property(PREV_DEFS ${DBLAS_OBJ} COMPILE_DEFINITIONS)
#set_target_properties(${DBLAS_OBJ} PROPERTIES COMPILE_DEFINITIONS "${PREV_DEFS};DOUBLE")
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${DBLAS_OBJ}>")
endforeach ()

View File

@ -57,7 +57,7 @@ if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
# Ensure the correct stack alignment on Win32
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97
if (${ARCH} STREQUAL "x86")
if (NOT MSVC)
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2")

View File

@ -32,7 +32,7 @@ if (DEFINED TARGET)
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
endif ()
if (${INTERFACE64})
if (INTERFACE64)
message(STATUS "Using 64-bit integers.")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT")
endif ()
@ -43,12 +43,12 @@ endif ()
message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}")
if (${NO_AVX})
if (NO_AVX)
message(STATUS "Disabling Advanced Vector Extensions (AVX).")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX")
endif ()
if (${NO_AVX2})
if (NO_AVX2)
message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).")
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2")
endif ()

View File

@ -64,85 +64,98 @@ endfunction ()
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition
# @param sources_in the source files to build from
# @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc)
# @param defines_in (optional) preprocessor definitions that will be applied to all objects
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended.
# e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax"
# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU)
# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters)
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
function(GenerateNamedObjects sources_in)
if (DEFINED ARGV1)
set(float_type_in ${ARGV1})
set(defines_in ${ARGV1})
endif ()
if (DEFINED ARGV2)
set(defines_in ${ARGV2})
set(name_in ${ARGV2})
endif ()
if (DEFINED ARGV3)
set(name_in ${ARGV3})
endif ()
if (DEFINED ARGV4)
set(use_cblas ${ARGV4})
set(use_cblas ${ARGV3})
else ()
set(use_cblas 0)
endif ()
if (DEFINED ARGV4)
set(replace_last_with ${ARGV4})
endif ()
if (DEFINED ARGV5)
set(replace_last_with ${ARGV5})
set(append_with ${ARGV5})
endif ()
if (DEFINED ARGV6)
set(append_with ${ARGV6})
set(no_float_type ${ARGV6})
else ()
set(no_float_type false)
endif ()
if (no_float_type)
set(float_list "DUMMY") # still need to loop once
else ()
set(float_list "${FLOAT_TYPES}")
endif ()
set(OBJ_LIST_OUT "")
foreach (source_file ${sources_in})
foreach (float_type ${float_list})
foreach (source_file ${sources_in})
if (DEFINED float_type_in AND NOT float_type_in STREQUAL "")
string(SUBSTRING ${float_type_in} 0 1 float_char)
string(TOLOWER ${float_char} float_char)
endif ()
if (NOT name_in)
get_filename_component(source_name ${source_file} NAME_WE)
set(obj_name "${float_char}${source_name}")
else ()
# replace * with float_char
if (${name_in} MATCHES "\\*")
string(REPLACE "*" ${float_char} obj_name ${name_in})
else ()
set(obj_name "${float_char}${name_in}")
if (NOT no_float_type)
string(SUBSTRING ${float_type} 0 1 float_char)
string(TOLOWER ${float_char} float_char)
endif ()
endif ()
if (replace_last_with)
string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name})
else ()
set(obj_name "${obj_name}${append_with}")
endif ()
if (NOT name_in)
get_filename_component(source_name ${source_file} NAME_WE)
set(obj_name "${float_char}${source_name}")
else ()
# replace * with float_char
if (${name_in} MATCHES "\\*")
string(REPLACE "*" ${float_char} obj_name ${name_in})
else ()
set(obj_name "${float_char}${name_in}")
endif ()
endif ()
# now add the object and set the defines
set(obj_defines ${defines_in})
if (replace_last_with)
string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name})
else ()
set(obj_name "${obj_name}${append_with}")
endif ()
if (use_cblas)
set(obj_name "cblas_${obj_name}")
list(APPEND obj_defines "CBLAS")
endif ()
# now add the object and set the defines
set(obj_defines ${defines_in})
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
list(APPEND obj_defines ${defines_in})
if (NOT ${float_type_in} STREQUAL "SINGLE")
list(APPEND obj_defines ${float_type_in})
endif ()
if (use_cblas)
set(obj_name "cblas_${obj_name}")
list(APPEND obj_defines "CBLAS")
endif ()
add_library(${obj_name} OBJECT ${source_file})
set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${obj_defines}")
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"")
list(APPEND obj_defines ${defines_in})
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "DOUBLE")
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "COMPLEX")
endif ()
list(APPEND OBJ_LIST_OUT ${obj_name})
add_library(${obj_name} OBJECT ${source_file})
set_target_properties(${obj_name} PROPERTIES COMPILE_DEFINITIONS "${obj_defines}")
list(APPEND OBJ_LIST_OUT ${obj_name})
endforeach ()
endforeach ()
list(APPEND DBLAS_OBJS ${OBJ_LIST_OUT})
@ -152,7 +165,6 @@ endfunction ()
# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in
# @param sources_in the source files to build from
# @param defines_in the preprocessor definitions that will be combined to create the object files
# @param float_type_in the float type to define for this build (e.g. SINGLE/DOUBLE/etc)
# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects
# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU.
# If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU.
@ -160,10 +172,15 @@ endfunction ()
# If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects).
# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel
# @param alternate_name replaces the source name as the object name (define codes are still appended)
function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_type_in all_defines_in replace_scheme)
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc)
function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme)
if (DEFINED ARGV5)
set(alternate_name ${ARGV5})
endif ()
if (DEFINED ARGV6)
set(alternate_name ${ARGV6})
set(no_float_type ${ARGV6})
endif ()
AllCombinations("${defines_in}" "${absent_codes_in}")
@ -223,7 +240,7 @@ function(GenerateCombinationObjects sources_in defines_in absent_codes_in float_
endif ()
endif ()
GenerateNamedObjects("${source_file}" "${float_type_in}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}")
GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" 0 "${replace_code}" "${append_code}" "${no_float_type}")
list(APPEND COMBO_OBJ_LIST_OUT "${OBJ_LIST_OUT}")
endforeach ()
endforeach ()

View File

@ -28,26 +28,26 @@ set(NU_SOURCES
)
# objects that need LOWER set
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "DOUBLE" "" 1)
GenerateCombinationObjects("${UL_SOURCES}" "LOWER" "U" "" 1)
# objects that need TRANSA and UNIT set
# N.B. BLAS wants to put the U/L from the filename in the *MIDDLE* because of course why not have a different naming scheme for every single object -hpa
GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "DOUBLE" "" 3)
GenerateCombinationObjects("${NU_SOURCES}" "TRANSA;UNIT" "N;N" "" 3)
# gbmv uses a lowercase n and t. WHY? WHO KNOWS!
GenerateNamedObjects("gbmv_k.c" "DOUBLE" "" "gbmv_n")
GenerateNamedObjects("gbmv_k.c" "DOUBLE" "TRANS" "gbmv_t")
GenerateNamedObjects("gbmv_k.c" "" "gbmv_n")
GenerateNamedObjects("gbmv_k.c" "TRANS" "gbmv_t")
if (SMP)
# gbmv uses a lowercase n and t. N.B. this uses TRANSA where gbmv.c uses TRANS. Intentional?
GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "" "gbmv_thread_n")
GenerateNamedObjects("gbmv_thread.c" "DOUBLE" "TRANSA" "gbmv_thread_t")
GenerateNamedObjects("gbmv_thread.c" "" "gbmv_thread_n")
GenerateNamedObjects("gbmv_thread.c" "TRANSA" "gbmv_thread_t")
GenerateNamedObjects("gemv_thread.c" "DOUBLE" "" "gemv_thread_n")
GenerateNamedObjects("gemv_thread.c" "DOUBLE" "TRANSA" "gemv_thread_t")
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n")
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t")
GenerateNamedObjects("ger_thread.c" "DOUBLE")
GenerateNamedObjects("ger_thread.c")
set(UL_SMP_SOURCES
symv_thread.c
@ -59,7 +59,7 @@ if (SMP)
sbmv_thread.c
)
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "DOUBLE" "" 2)
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
set(NU_SMP_SOURCES
trmv_thread.c
@ -67,7 +67,7 @@ if (SMP)
tbmv_thread.c
)
GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "DOUBLE" "" 2)
GenerateCombinationObjects("${NU_SMP_SOURCES}" "TRANSA;LOWER;UNIT" "N;U;N" "" 2)
endif ()

View File

@ -26,25 +26,25 @@ endif ()
set(GEMM_DEFINES NN NT TN TT)
foreach (GEMM_DEFINE ${GEMM_DEFINES})
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0)
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "DOUBLE" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0)
endif ()
endforeach ()
GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "DOUBLE" "" 0)
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "DOUBLE" "NN" 1)
GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "U;N" "DOUBLE" "" 1)
GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "DOUBLE" "" 2)
GenerateCombinationObjects("trmm_L.c;trmm_R.c;trsm_L.c;trsm_R.c" "TRANS;UPPER;UNIT" "N;L;N" "" 0)
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "NN" 1)
GenerateCombinationObjects("syrk_k.c;syr2k_k.c" "LOWER;TRANS" "U;N" "" 1)
GenerateCombinationObjects("syrk_kernel.c;syr2k_kernel.c" "LOWER" "U" "" 2)
if (SMP)
# N.B. these do NOT have a float type (e.g. DOUBLE) defined!
GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" "" 0)
GenerateNamedObjects("gemm_thread_m.c;gemm_thread_n.c;gemm_thread_mn.c;gemm_thread_variable.c;syrk_thread.c" "" "" 0 "" "" 1)
if (NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "DOUBLE" "THREADED_LEVEL3" 2 "syrk_thread")
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "DOUBLE" "THREADED_LEVEL3;NN" 2 "symm_thread")
GenerateCombinationObjects("syrk_k.c" "LOWER;TRANS" "U;N" "THREADED_LEVEL3" 2 "syrk_thread")
GenerateCombinationObjects("symm_k.c" "RSIDE;LOWER" "L;U" "THREADED_LEVEL3;NN" 2 "symm_thread")
endif ()
endif ()

View File

@ -35,10 +35,10 @@ set(COMMON_SOURCES
openblas_error_handle.c
)
# these need to have NAME/CNAME set, so use GenerateNamedObjects
GenerateNamedObjects("abs.c" "" "" "c_abs")
GenerateNamedObjects("abs.c" "" "DOUBLE" "z_abs")
GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c")
# these need to have NAME/CNAME set, so use GenerateNamedObjects, but don't use standard name mangling
GenerateNamedObjects("abs.c" "" "c_abs" 0 "" "" 1 )
GenerateNamedObjects("abs.c" "DOUBLE" "z_abs" 0 "" "" 1)
GenerateNamedObjects("openblas_get_config.c;openblas_get_parallel.c" "" "" 0 "" "" 1)
if (DYNAMIC_ARCH)
list(APPEND COMMON_SOURCES dynamic.c)

View File

@ -40,23 +40,23 @@ endif ()
foreach (CBLAS_FLAG ${CBLAS_FLAGS})
GenerateNamedObjects("${BLAS1_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG})
GenerateNamedObjects("${BLAS2_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG})
GenerateNamedObjects("${BLAS3_SOURCES}" "DOUBLE" "" "" ${CBLAS_FLAG})
GenerateNamedObjects("${BLAS1_SOURCES}" "" "" ${CBLAS_FLAG})
GenerateNamedObjects("${BLAS2_SOURCES}" "" "" ${CBLAS_FLAG})
GenerateNamedObjects("${BLAS3_SOURCES}" "" "" ${CBLAS_FLAG})
# trmm is trsm with a compiler flag set
GenerateNamedObjects("trsm.c" "DOUBLE" "TRMM" "trmm" ${CBLAS_FLAG})
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
# max and imax are compiled 4 times
GenerateNamedObjects("max.c" "DOUBLE" "" "" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS" "amax" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "DOUBLE" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "DOUBLE" "USE_MIN" "min" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "" "" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "USE_ABS" "amax" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "amin" ${CBLAS_FLAG})
GenerateNamedObjects("max.c" "USE_MIN" "min" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "DOUBLE" "" "i*max" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS" "i*amax" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "DOUBLE" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "DOUBLE" "USE_MIN" "i*min" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "" "i*max" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "USE_ABS;USE_MIN" "i*amin" ${CBLAS_FLAG})
GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG})
endforeach ()
@ -66,7 +66,7 @@ if (NOT DEFINED NO_LAPACK)
lapack/potf2.c lapack/laswp.c lapack/gesv.c lapack/lauu2.c
lapack/lauum.c lapack/trti2.c lapack/trtri.c
)
GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE" "" "" 0)
GenerateNamedObjects("${LAPACK_SOURCES}")
endif ()
set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS

View File

@ -11,9 +11,10 @@ endif ()
set(LSAME_KERNEL lsame.S)
set(SCABS_KERNEL cabs.S)
set(DCABS_KERNEL cabs.S)
GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "" "F_INTERFACE" "lsame")
GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "SINGLE" "COMPLEX;F_INTERFACE" "cabs1")
GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE" "COMPLEX;F_INTERFACE" "cabs1")
# don't use float type name mangling here
GenerateNamedObjects("${KERNELDIR}/${LSAME_KERNEL}" "F_INTERFACE" "lsame" 0 "" "" 1)
GenerateNamedObjects("${KERNELDIR}/${SCABS_KERNEL}" "COMPLEX;F_INTERFACE" "scabs1" "" "" 1)
GenerateNamedObjects("${KERNELDIR}/${DCABS_KERNEL}" "DOUBLE;COMPLEX;F_INTERFACE" "dcabs1" 0 "" "" 1)
# Makefile.L1
@ -37,30 +38,30 @@ set(DSCALKERNEL scal.S)
set(DSWAPKERNEL swap.S)
set(DAXPBYKERNEL ../arm/axpby.c)
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "DOUBLE" "USE_ABS" "amax_k")
GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "DOUBLE" "USE_ABS;USE_MIN" "amin_k")
GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "DOUBLE" "" "max_k")
GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "DOUBLE" "" "min_k")
GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "DOUBLE" "USE_ABS" "i*amax_k")
GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "DOUBLE" "USE_ABS;USE_MIN" "i*amin_k")
GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "DOUBLE" "" "i*max_k")
GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "DOUBLE" "" "i*min_k")
GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "DOUBLE" "" "asum_k")
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "DOUBLE" "" "axpy_k")
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "DOUBLE" "C_INTERFACE" "copy_k")
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "DOUBLE" "" "dot_k")
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "DOUBLE" "" "nrm2_k")
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "DOUBLE" "" "rot_k")
GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "DOUBLE" "" "scal_k")
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "DOUBLE" "" "swap_k")
GenerateNamedObjects("${KERNELDIR}/${DAXPBYKERNEL}" "DOUBLE" "" "axpby_k")
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k")
GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k")
GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k")
GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "" "min_k")
GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k")
GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k")
GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k")
GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "" "i*min_k")
GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k")
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k")
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k")
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k")
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k")
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k")
GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k")
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k")
GenerateNamedObjects("${KERNELDIR}/${DAXPBYKERNEL}" "" "axpby_k")
# Makefile.L2
GenerateNamedObjects("${KERNELDIR}/gemv_n.S" "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "DOUBLE" "TRANS")
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "DOUBLE" "" 1)
GenerateNamedObjects("generic/ger.c" "DOUBLE" "" "ger_k")
GenerateNamedObjects("${KERNELDIR}/gemv_t.S" "TRANS")
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1)
GenerateNamedObjects("generic/ger.c" "" "ger_k")
# Makefile.L3
@ -76,78 +77,78 @@ set(DGEMMITCOPYOBJ gemm_itcopy)
set(DGEMMONCOPYOBJ gemm_oncopy)
set(DGEMMOTCOPYOBJ gemm_otcopy)
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "" "gemm_kernel")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel")
if (DGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "" "${DGEMMINCOPYOBJ}")
GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "" "${DGEMMINCOPYOBJ}")
endif ()
if (DGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "" "${DGEMMITCOPYOBJ}")
GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "" "${DGEMMITCOPYOBJ}")
endif ()
if (DGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "" "${DGEMMONCOPYOBJ}")
GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "" "${DGEMMONCOPYOBJ}")
endif ()
if (DGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "" "${DGEMMOTCOPYOBJ}")
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "" "${DGEMMOTCOPYOBJ}")
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "DOUBLE")
GenerateCombinationObjects("${KERNELDIR}/${DGEMMKERNEL}" "LEFT;TRANSA" "R;N" "DOUBLE" "TRMMKERNEL" 2 "trmm_kernel")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "LT;TRSMKERNEL" "trsm_kernel_LT")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "DOUBLE" "RT;TRSMKERNEL" "trsm_kernel_RT")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}")
GenerateCombinationObjects("${KERNELDIR}/${DGEMMKERNEL}" "LEFT;TRANSA" "R;N" "TRMMKERNEL" 2 "trmm_kernel")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "LT;TRSMKERNEL" "trsm_kernel_LT")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "RT;TRSMKERNEL" "trsm_kernel_RT")
# These don't use a scheme that is easy to iterate over - the filenames have part of the DEFINE codes in them, for UPPER/TRANS but not for UNIT/OUTER. Also TRANS is not passed in as a define.
# Could simplify it a bit by pairing up by -UUNIT/-DUNIT.
GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trmm_iunucopy")
GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trmm_iunncopy")
GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trmm_ounucopy")
GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trmm_ounncopy")
GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy")
GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "" "trmm_iunncopy")
GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_ounucopy")
GenerateNamedObjects("generic/trmm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER" "trmm_ounncopy")
GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trmm_ilnucopy")
GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trmm_ilnncopy")
GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trmm_olnucopy")
GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trmm_olnncopy")
GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy")
GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy")
GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy")
GenerateNamedObjects("generic/trmm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_olnncopy")
GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trmm_iutucopy")
GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trmm_iutncopy")
GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trmm_outucopy")
GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trmm_outncopy")
GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy")
GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "" "trmm_iutncopy")
GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trmm_outucopy")
GenerateNamedObjects("generic/trmm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER" "trmm_outncopy")
GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trmm_iltucopy")
GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trmm_iltncopy")
GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trmm_oltucopy")
GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trmm_oltncopy")
GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy")
GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy")
GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy")
GenerateNamedObjects("generic/trmm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trmm_oltncopy")
GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trsm_iunucopy")
GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trsm_iunncopy")
GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trsm_ounucopy")
GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trsm_ounncopy")
GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy")
GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "" "trsm_iunncopy")
GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_ounucopy")
GenerateNamedObjects("generic/trsm_uncopy_${DGEMM_UNROLL_M}.c" "OUTER" "trsm_ounncopy")
GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trsm_ilnucopy")
GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trsm_ilnncopy")
GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trsm_olnucopy")
GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trsm_olnncopy")
GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy")
GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy")
GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy")
GenerateNamedObjects("generic/trsm_lncopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_olnncopy")
GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "UNIT" "trsm_iutucopy")
GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "" "trsm_iutncopy")
GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;UNIT" "trsm_outucopy")
GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER" "trsm_outncopy")
GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy")
GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "" "trsm_iutncopy")
GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER;UNIT" "trsm_outucopy")
GenerateNamedObjects("generic/trsm_utcopy_${DGEMM_UNROLL_M}.c" "OUTER" "trsm_outncopy")
GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER;UNIT" "trsm_iltucopy")
GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "LOWER" "trsm_iltncopy")
GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER;UNIT" "trsm_oltucopy")
GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "DOUBLE" "OUTER;LOWER" "trsm_oltncopy")
GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy")
GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy")
GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy")
GenerateNamedObjects("generic/trsm_ltcopy_${DGEMM_UNROLL_M}.c" "OUTER;LOWER" "trsm_oltncopy")
GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "OUTER" "symm_outcopy")
GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "" "symm_iutcopy")
GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "OUTER" "symm_outcopy")
GenerateNamedObjects("generic/symm_ucopy_${DGEMM_UNROLL_N}.c" "" "symm_iutcopy")
GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "LOWER;OUTER" "symm_oltcopy")
GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "DOUBLE" "LOWER" "symm_iltcopy")
GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy")
GenerateNamedObjects("generic/symm_lcopy_${DGEMM_UNROLL_N}.c" "LOWER" "symm_iltcopy")
if (NOT DEFINED DOMATCOPY_CN)
set(DOMATCOPY_CN ../arm/omatcopy_cn.c)
@ -162,10 +163,10 @@ if (NOT DEFINED DOMATCOPY_RT)
set(DOMATCOPY_RT ../arm/omatcopy_rt.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CN}" "DOUBLE" "" "domatcopy_k_cn")
GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RN}" "DOUBLE" "ROWM" "domatcopy_k_rn")
GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CT}" "DOUBLE" "" "domatcopy_k_ct")
GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RT}" "DOUBLE" "ROWM" "domatcopy_k_rt")
GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CN}" "" "domatcopy_k_cn")
GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RN}" "ROWM" "domatcopy_k_rn")
GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_CT}" "" "domatcopy_k_ct")
GenerateNamedObjects("${KERNELDIR}/${DOMATCOPY_RT}" "ROWM" "domatcopy_k_rt")
# Makefile.LA
#DBLASOBJS += dneg_tcopy$(TSUFFIX).$(SUFFIX) dlaswp_ncopy$(TSUFFIX).$(SUFFIX)

View File

@ -50,11 +50,11 @@ set(ZLAPACK_SOURCES
trtri/trtri_L_single.c
)
GenerateNamedObjects("${LAPACK_SOURCES}" "DOUBLE")
GenerateNamedObjects("${LAPACK_SOURCES}")
# TODO: laswp needs arch specific code
GenerateNamedObjects("laswp/generic/laswp_k.c" "DOUBLE" "" "laswp_plus")
GenerateNamedObjects("laswp/generic/laswp_k.c" "DOUBLE" "MINUS" "laswp_minus")
GenerateNamedObjects("laswp/generic/laswp_k.c" "" "laswp_plus")
GenerateNamedObjects("laswp/generic/laswp_k.c" "MINUS" "laswp_minus")
if (SMP)
@ -92,12 +92,12 @@ if (SMP)
trtri/trtri_L_parallel.c
)
GenerateNamedObjects("${PARALLEL_SOURCES}" "DOUBLE" "" "" 0)
GenerateNamedObjects("${PARALLEL_SOURCES}")
endif ()
GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "DOUBLE" "" 4)
GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "DOUBLE" "" 4)
GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "DOUBLE" "" 0)
GenerateCombinationObjects("${TRANS_SOURCES}" "TRANS" "N" "" 4)
GenerateCombinationObjects("${UNIT_SOURCES}" "UNIT" "N" "" 4)
GenerateCombinationObjects("${UNIT_SOURCES2}" "UNIT" "N" "" 0)
set(DBLAS_OBJS ${DBLAS_OBJS} PARENT_SCOPE) # list append removes the scope from DBLAS_OBJS