cmake fixes

This commit is contained in:
Rajalakshmi Srinivasaraghavan 2020-04-17 13:35:17 -05:00
parent 67cc4b9e16
commit 22bb50fb81
7 changed files with 287 additions and 6 deletions

View File

@ -89,6 +89,7 @@ endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
set(BUILD_HALF true)
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
@ -120,6 +121,11 @@ if (BUILD_COMPLEX16)
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()
if (BUILD_SINGLE OR BUILD_HALF)
message(STATUS "Building Half Precision")
list(APPEND FLOAT_TYPES "HALF") # defines nothing
endif ()
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
endif ()

View File

@ -113,11 +113,29 @@ macro(SetDefaultL1)
set(ZSUMKERNEL zsum.S)
set(QSUMKERNEL sum.S)
set(XSUMKERNEL zsum.S)
set(SHAMINKERNEL ../arm/amin.c)
set(SHAMAXKERNEL amax.S)
set(SHMAXKERNEL ../arm/max.c)
set(SHMINKERNEL ../arm/min.c)
set(ISHAMAXKERNEL iamax.S)
set(ISHAMINKERNEL ../arm/iamin.c)
set(ISHMAXKERNEL ../arm/imax.c)
set(ISHMINKERNEL ../arm/imin.c)
set(SHASUMKERNEL asum.S)
set(SHAXPYKERNEL axpy.S)
set(SHAXPBYKERNEL ../arm/axpby.c)
set(SHCOPYKERNEL copy.S)
set(SHDOTKERNEL dot.S)
set(SHROTKERNEL rot.S)
set(SHSCALKERNEL scal.S)
set(SHNRM2KERNEL nrm2.S)
set(SHSUMKERNEL sum.S)
set(SHSWAPKERNEL swap.S)
endmacro ()
macro(SetDefaultL2)
set(SGEMVNKERNEL gemv_n.S)
set(SGEMVTKERNEL gemv_t.S)
set(SGEMVNKERNEL ../arm/gemv_n.c)
set(SGEMVTKERNEL ../arm/gemv_t.c)
set(DGEMVNKERNEL gemv_n.S)
set(DGEMVTKERNEL gemv_t.S)
set(CGEMVNKERNEL zgemv_n.S)
@ -161,6 +179,10 @@ macro(SetDefaultL2)
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
set(SHGEMVNKERNEL ../arm/gemv_n.c)
set(SHGEMVTKERNEL ../arm/gemv_t.c)
set(SHGERKERNEL ../generic/ger.c)
endmacro ()
macro(SetDefaultL3)
@ -168,4 +190,17 @@ macro(SetDefaultL3)
set(DGEADD_KERNEL ../generic/geadd.c)
set(CGEADD_KERNEL ../generic/zgeadd.c)
set(ZGEADD_KERNEL ../generic/zgeadd.c)
set(SHGEADD_KERNEL ../generic/geadd.c)
set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
set(SHGEMM_BETA ../generic/gemm_beta.c)
set(SHGEMMINCOPY ../generic/gemm_ncopy_2.c)
set(SHGEMMITCOPY ../generic/gemm_tcopy_2.c)
set(SHGEMMONCOPY ../generic/gemm_ncopy_2.c)
set(SHGEMMOTCOPY ../generic/gemm_tcopy_2.c)
set(SHGEMMINCOPYOBJ shgemm_incopy.o)
set(SHGEMMITCOPYOBJ shgemm_itcopy.o)
set(SHGEMMONCOPYOBJ shgemm_oncopy.o)
set(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
endmacro ()

View File

@ -163,6 +163,7 @@ function(GenerateNamedObjects sources_in)
if (complex_only)
list(REMOVE_ITEM float_list "SINGLE")
list(REMOVE_ITEM float_list "DOUBLE")
list(REMOVE_ITEM float_list "HALF")
elseif (real_only)
list(REMOVE_ITEM float_list "COMPLEX")
list(REMOVE_ITEM float_list "ZCOMPLEX")
@ -176,6 +177,9 @@ function(GenerateNamedObjects sources_in)
if (NOT no_float_type)
string(SUBSTRING ${float_type} 0 1 float_char)
string(TOLOWER ${float_char} float_char)
if (${float_type} STREQUAL "HALF")
set (float_char "sh")
endif ()
endif ()
if (NOT name_in)
@ -210,6 +214,9 @@ function(GenerateNamedObjects sources_in)
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "DOUBLE")
endif ()
if (${float_type} STREQUAL "HALF")
list(APPEND obj_defines "HALF")
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
list(APPEND obj_defines "COMPLEX")
if (mangle_complex_sources)

View File

@ -646,6 +646,19 @@
#elif defined(HALF)
#define AXPYU_K SAXPYU_K
#define AXPYC_K SAXPYC_K
#define SCAL_K SSCAL_K
#define GEMV_N SGEMV_N
#define GEMV_T SGEMV_T
#define SYMV_U SSYMV_U
#define SYMV_L SSYMV_L
#define GERU_K SGERU_K
#define GERC_K SGERC_K
#define GERV_K SGERV_K
#define GERD_K SGERD_K
#define SYMV_THREAD_U SSYMV_THREAD_U
#define SYMV_THREAD_L SSYMV_THREAD_L
#define GEMM_BETA SHGEMM_BETA
#define GEMM_KERNEL_N SHGEMM_KERNEL
#define GEMM_KERNEL_L SHGEMM_KERNEL
@ -672,6 +685,20 @@
#define GEMM_OTCOPY SHGEMM_OTCOPY
#define GEMM_INCOPY SHGEMM_INCOPY
#define GEMM_ITCOPY SHGEMM_ITCOPY
#define SYMM_THREAD_LU SSYMM_THREAD_LU
#define SYMM_THREAD_LL SSYMM_THREAD_LL
#define SYMM_THREAD_RU SSYMM_THREAD_RU
#define SYMM_THREAD_RL SSYMM_THREAD_RL
#define SYMM_LU SSYMM_LU
#define SYMM_LL SSYMM_LL
#define SYMM_RU SSYMM_RU
#define SYMM_RL SSYMM_RL
#define HEMM_THREAD_LU SHEMM_THREAD_LU
#define HEMM_THREAD_LL SHEMM_THREAD_LL
#define HEMM_THREAD_RU SHEMM_THREAD_RU
#define HEMM_THREAD_RL SHEMM_THREAD_RL
#define GEMM_THREAD_NN SHGEMM_THREAD_NN
#define GEMM_THREAD_CN SHGEMM_THREAD_TN
@ -690,6 +717,186 @@
#define GEMM_THREAD_RC SHGEMM_THREAD_NT
#define GEMM_THREAD_RR SHGEMM_THREAD_NN
#ifdef UNIT
#define TRMM_OUNCOPY STRMM_OUNUCOPY
#define TRMM_OUTCOPY STRMM_OUTUCOPY
#define TRMM_OLNCOPY STRMM_OLNUCOPY
#define TRMM_OLTCOPY STRMM_OLTUCOPY
#define TRSM_OUNCOPY STRSM_OUNUCOPY
#define TRSM_OUTCOPY STRSM_OUTUCOPY
#define TRSM_OLNCOPY STRSM_OLNUCOPY
#define TRSM_OLTCOPY STRSM_OLTUCOPY
#define TRMM_IUNCOPY STRMM_IUNUCOPY
#define TRMM_IUTCOPY STRMM_IUTUCOPY
#define TRMM_ILNCOPY STRMM_ILNUCOPY
#define TRMM_ILTCOPY STRMM_ILTUCOPY
#define TRSM_IUNCOPY STRSM_IUNUCOPY
#define TRSM_IUTCOPY STRSM_IUTUCOPY
#define TRSM_ILNCOPY STRSM_ILNUCOPY
#define TRSM_ILTCOPY STRSM_ILTUCOPY
#else
#define TRMM_OUNCOPY STRMM_OUNNCOPY
#define TRMM_OUTCOPY STRMM_OUTNCOPY
#define TRMM_OLNCOPY STRMM_OLNNCOPY
#define TRMM_OLTCOPY STRMM_OLTNCOPY
#define TRSM_OUNCOPY STRSM_OUNNCOPY
#define TRSM_OUTCOPY STRSM_OUTNCOPY
#define TRSM_OLNCOPY STRSM_OLNNCOPY
#define TRSM_OLTCOPY STRSM_OLTNCOPY
#define TRMM_IUNCOPY STRMM_IUNNCOPY
#define TRMM_IUTCOPY STRMM_IUTNCOPY
#define TRMM_ILNCOPY STRMM_ILNNCOPY
#define TRMM_ILTCOPY STRMM_ILTNCOPY
#define TRSM_IUNCOPY STRSM_IUNNCOPY
#define TRSM_IUTCOPY STRSM_IUTNCOPY
#define TRSM_ILNCOPY STRSM_ILNNCOPY
#define TRSM_ILTCOPY STRSM_ILTNCOPY
#define TRMM_KERNEL_LN STRMM_KERNEL_LN
#define TRMM_KERNEL_LT STRMM_KERNEL_LT
#define TRMM_KERNEL_LR STRMM_KERNEL_LN
#define TRMM_KERNEL_LC STRMM_KERNEL_LT
#define TRMM_KERNEL_RN STRMM_KERNEL_RN
#define TRMM_KERNEL_RT STRMM_KERNEL_RT
#define TRMM_KERNEL_RR STRMM_KERNEL_RN
#define TRMM_KERNEL_RC STRMM_KERNEL_RT
#define TRSM_KERNEL_LN STRSM_KERNEL_LN
#define TRSM_KERNEL_LT STRSM_KERNEL_LT
#define TRSM_KERNEL_LR STRSM_KERNEL_LN
#define TRSM_KERNEL_LC STRSM_KERNEL_LT
#define TRSM_KERNEL_RN STRSM_KERNEL_RN
#define TRSM_KERNEL_RT STRSM_KERNEL_RT
#define TRSM_KERNEL_RR STRSM_KERNEL_RN
#define TRSM_KERNEL_RC STRSM_KERNEL_RT
#define SYMM_IUTCOPY SSYMM_IUTCOPY
#define SYMM_ILTCOPY SSYMM_ILTCOPY
#define SYMM_OUTCOPY SSYMM_OUTCOPY
#define SYMM_OLTCOPY SSYMM_OLTCOPY
#define TRMM_LNUU STRMM_LNUU
#define TRMM_LNUN STRMM_LNUN
#define TRMM_LNLU STRMM_LNLU
#define TRMM_LNLN STRMM_LNLN
#define TRMM_LTUU STRMM_LTUU
#define TRMM_LTUN STRMM_LTUN
#define TRMM_LTLU STRMM_LTLU
#define TRMM_LTLN STRMM_LTLN
#define TRMM_LRUU STRMM_LNUU
#define TRMM_LRUN STRMM_LNUN
#define TRMM_LRLU STRMM_LNLU
#define TRMM_LRLN STRMM_LNLN
#define TRMM_LCUU STRMM_LTUU
#define TRMM_LCUN STRMM_LTUN
#define TRMM_LCLU STRMM_LTLU
#define TRMM_LCLN STRMM_LTLN
#define TRMM_RNUU STRMM_RNUU
#define TRMM_RNUN STRMM_RNUN
#define TRMM_RNLU STRMM_RNLU
#define TRMM_RNLN STRMM_RNLN
#define TRMM_RTUU STRMM_RTUU
#define TRMM_RTUN STRMM_RTUN
#define TRMM_RTLU STRMM_RTLU
#define TRMM_RTLN STRMM_RTLN
#define TRMM_RRUU STRMM_RNUU
#define TRMM_RRUN STRMM_RNUN
#define TRMM_RRLU STRMM_RNLU
#define TRMM_RRLN STRMM_RNLN
#define TRMM_RCUU STRMM_RTUU
#define TRMM_RCUN STRMM_RTUN
#define TRMM_RCLU STRMM_RTLU
#define TRMM_RCLN STRMM_RTLN
#define TRSM_LNUU STRSM_LNUU
#define TRSM_LNUN STRSM_LNUN
#define TRSM_LNLU STRSM_LNLU
#define TRSM_LNLN STRSM_LNLN
#define TRSM_LTUU STRSM_LTUU
#define TRSM_LTUN STRSM_LTUN
#define TRSM_LTLU STRSM_LTLU
#define TRSM_LTLN STRSM_LTLN
#define TRSM_LRUU STRSM_LNUU
#define TRSM_LRUN STRSM_LNUN
#define TRSM_LRLU STRSM_LNLU
#define TRSM_LRLN STRSM_LNLN
#define TRSM_LCUU STRSM_LTUU
#define TRSM_LCUN STRSM_LTUN
#define TRSM_LCLU STRSM_LTLU
#define TRSM_LCLN STRSM_LTLN
#define TRSM_RNUU STRSM_RNUU
#define TRSM_RNUN STRSM_RNUN
#define TRSM_RNLU STRSM_RNLU
#define TRSM_RNLN STRSM_RNLN
#define TRSM_RTUU STRSM_RTUU
#define TRSM_RTUN STRSM_RTUN
#define TRSM_RTLU STRSM_RTLU
#define TRSM_RTLN STRSM_RTLN
#define TRSM_RRUU STRSM_RNUU
#define TRSM_RRUN STRSM_RNUN
#define TRSM_RRLU STRSM_RNLU
#define TRSM_RRLN STRSM_RNLN
#define TRSM_RCUU STRSM_RTUU
#define TRSM_RCUN STRSM_RTUN
#define TRSM_RCLU STRSM_RTLU
#define TRSM_RCLN STRSM_RTLN
#define SYRK_UN SSYRK_UN
#define SYRK_UT SSYRK_UT
#define SYRK_LN SSYRK_LN
#define SYRK_LT SSYRK_LT
#define SYRK_UR SSYRK_UN
#define SYRK_UC SSYRK_UT
#define SYRK_LR SSYRK_LN
#define SYRK_LC SSYRK_LT
#define SYRK_KERNEL_U SSYRK_KERNEL_U
#define SYRK_KERNEL_L SSYRK_KERNEL_L
#define HERK_UN SSYRK_UN
#define HERK_LN SSYRK_LN
#define HERK_UC SSYRK_UT
#define HERK_LC SSYRK_LT
#define HER2K_UN SSYR2K_UN
#define HER2K_LN SSYR2K_LN
#define HER2K_UC SSYR2K_UT
#define HER2K_LC SSYR2K_LT
#define SYR2K_UN SSYR2K_UN
#define SYR2K_UT SSYR2K_UT
#define SYR2K_LN SSYR2K_LN
#define SYR2K_LT SSYR2K_LT
#define SYR2K_UR SSYR2K_UN
#define SYR2K_UC SSYR2K_UT
#define SYR2K_LR SSYR2K_LN
#define SYR2K_LC SSYR2K_LT
#define SYR2K_KERNEL_U SSYR2K_KERNEL_U
#define SYR2K_KERNEL_L SSYR2K_KERNEL_L
#define SYRK_THREAD_UN SSYRK_THREAD_UN
#define SYRK_THREAD_UT SSYRK_THREAD_UT
#define SYRK_THREAD_LN SSYRK_THREAD_LN
#define SYRK_THREAD_LT SSYRK_THREAD_LT
#define SYRK_THREAD_UR SSYRK_THREAD_UR
#define SYRK_THREAD_UC SSYRK_THREAD_UC
#define SYRK_THREAD_LR SSYRK_THREAD_LN
#define SYRK_THREAD_LC SSYRK_THREAD_LT
#define HERK_THREAD_UN SSYRK_THREAD_UN
#define HERK_THREAD_UT SSYRK_THREAD_UT
#define HERK_THREAD_LN SSYRK_THREAD_LN
#define HERK_THREAD_LT SSYRK_THREAD_LT
#define HERK_THREAD_UR SSYRK_THREAD_UR
#define HERK_THREAD_UC SSYRK_THREAD_UC
#define HERK_THREAD_LR SSYRK_THREAD_LN
#define HERK_THREAD_LC SSYRK_THREAD_LT
#endif
#else
#define AMAX_K SAMAX_K
@ -721,14 +928,14 @@
#define GEMV_S SGEMV_S
#define GEMV_D SGEMV_D
#define SYMV_U SSYMV_U
#define SYMV_L SSYMV_L
#define GERU_K SGERU_K
#define GERC_K SGERC_K
#define GERV_K SGERV_K
#define GERD_K SGERD_K
#define SYMV_U SSYMV_U
#define SYMV_L SSYMV_L
#define SYMV_THREAD_U SSYMV_THREAD_U
#define SYMV_THREAD_L SSYMV_THREAD_L

View File

@ -12,6 +12,9 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh
foreach(float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char_upper)
string(TOLOWER ${float_char_upper} float_char)
if (${float_char} STREQUAL "h")
continue()
endif()
#level1
add_executable(x${float_char}cblat1
c_${float_char}blat1.f

View File

@ -41,6 +41,9 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
foreach (float_type ${FLOAT_TYPES})
# a bit of metaprogramming here to pull out the appropriate KERNEL var
string(SUBSTRING ${float_type} 0 1 float_char)
if (${float_type} STREQUAL "HALF")
set (float_char "SH")
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type})
if (DEFINED ${float_char}MAXKERNEL)
@ -93,6 +96,9 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char)
if (${float_type} STREQUAL "HALF")
set (float_char "SH")
endif ()
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERCKERNEL}" "CONJ" "gerc_k" false "" "" false ${float_type})
@ -128,13 +134,19 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
set(USE_TRMM true)
endif ()
foreach (float_type SINGLE DOUBLE)
foreach (float_type SINGLE DOUBLE HALF)
string(SUBSTRING ${float_type} 0 1 float_char)
if (${float_type} STREQUAL "HALF")
set (float_char "SH")
endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
endforeach()
foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char)
if (${float_type} STREQUAL "HALF")
set (float_char "SH")
endif ()
if (${float_char}GEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type})
endif ()
@ -470,9 +482,13 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach ()
# Makefile.LA
if(NOT NO_LAPACK)
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "HALF")
set (float_char "SH")
endif ()
if (NOT DEFINED ${float_char}NEG_TCOPY)
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X")
set(${float_char}NEG_TCOPY ../generic/zneg_tcopy.c)
@ -516,6 +532,9 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
foreach (float_type ${FLOAT_TYPES})
# a bit of metaprogramming here to pull out the appropriate KERNEL var
string(SUBSTRING ${float_type} 0 1 float_char)
if (${float_type} STREQUAL "HALF")
set (float_char "SH")
endif ()
GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type})
GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type})
endforeach ()

View File

@ -2,6 +2,7 @@
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${PROJECT_BINARY_DIR})
list (REMOVE_ITEM FLOAT_TYPES "HALF")
set(LAPACK_SOURCES
potrf/potrf_U_single.c
@ -45,6 +46,9 @@ GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" fa
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3)
foreach (float_type ${FLOAT_TYPES})
if (${float_type} STREQUAL "HALF")
continue()
endif()
GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type})
endforeach ()