diff --git a/cmake/kernel.cmake b/cmake/kernel.cmake index 79eeaae6f..7d7f5ffda 100644 --- a/cmake/kernel.cmake +++ b/cmake/kernel.cmake @@ -113,7 +113,7 @@ macro(SetDefaultL1) set(ZSUMKERNEL zsum.S) set(QSUMKERNEL sum.S) set(XSUMKERNEL zsum.S) -if (BUILD_HALF) +if (BUILD_BFLOAT16) set(SHAMINKERNEL ../arm/amin.c) set(SHAMAXKERNEL ../arm/amax.c) set(SHMAXKERNEL ../arm/max.c) @@ -126,7 +126,7 @@ if (BUILD_HALF) set(SHAXPYKERNEL ../arm/axpy.c) set(SHAXPBYKERNEL ../arm/axpby.c) set(SHCOPYKERNEL ../arm/copy.c) - set(SHDOTKERNEL ../x86_64/shdot.c) + set(SBDOTKERNEL ../x86_64/sbdot.c) set(SHROTKERNEL ../arm/rot.c) set(SHSCALKERNEL ../arm/scal.c) set(SHNRM2KERNEL ../arm/nrm2.c) @@ -183,9 +183,9 @@ macro(SetDefaultL2) set(XHEMV_L_KERNEL ../generic/zhemv_k.c) set(XHEMV_V_KERNEL ../generic/zhemv_k.c) set(XHEMV_M_KERNEL ../generic/zhemv_k.c) -if (BUILD_HALF) - set(SHGEMVNKERNEL ../arm/gemv_n.c) - set(SHGEMVTKERNEL ../arm/gemv_t.c) +if (BUILD_BFLOAT16) + set(SBGEMVNKERNEL ../arm/gemv_n.c) + set(SBGEMVTKERNEL ../arm/gemv_t.c) set(SHGERKERNEL ../generic/ger.c) endif () endmacro () @@ -195,18 +195,18 @@ macro(SetDefaultL3) set(DGEADD_KERNEL ../generic/geadd.c) set(CGEADD_KERNEL ../generic/zgeadd.c) set(ZGEADD_KERNEL ../generic/zgeadd.c) -if (BUILD_HALF) +if (BUILD_BFLOAT16) set(SHGEADD_KERNEL ../generic/geadd.c) - set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c) - set(SHGEMM_BETA ../generic/gemm_beta.c) - set(SHGEMMINCOPY ../generic/gemm_ncopy_2.c) - set(SHGEMMITCOPY ../generic/gemm_tcopy_2.c) - set(SHGEMMONCOPY ../generic/gemm_ncopy_2.c) - set(SHGEMMOTCOPY ../generic/gemm_tcopy_2.c) - set(SHGEMMINCOPYOBJ shgemm_incopy.o) - set(SHGEMMITCOPYOBJ shgemm_itcopy.o) - set(SHGEMMONCOPYOBJ shgemm_oncopy.o) - set(SHGEMMOTCOPYOBJ shgemm_otcopy.o) + set(SBGEMMKERNEL ../generic/gemmkernel_2x2.c) + set(SBGEMM_BETA ../generic/gemm_beta.c) + set(SBGEMMINCOPY ../generic/gemm_ncopy_2.c) + set(SBGEMMITCOPY ../generic/gemm_tcopy_2.c) + set(SBGEMMONCOPY ../generic/gemm_ncopy_2.c) + set(SBGEMMOTCOPY ../generic/gemm_tcopy_2.c) + set(SBGEMMINCOPYOBJ sbgemm_incopy.o) + set(SBGEMMITCOPYOBJ sbgemm_itcopy.o) + set(SBGEMMONCOPYOBJ sbgemm_oncopy.o) + set(SBGEMMOTCOPYOBJ sbgemm_otcopy.o) endif () endmacro () diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 3b2a9d6a2..f40304c09 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -16,8 +16,8 @@ # HAVE_SSE2 # HAVE_SSE3 # MAKE -# SHGEMM_UNROLL_M -# SHGEMM_UNROLL_N +# SBGEMM_UNROLL_M +# SBGEMM_UNROLL_N # SGEMM_UNROLL_M # SGEMM_UNROLL_N # DGEMM_UNROLL_M @@ -471,8 +471,8 @@ endif () set(ZGEMM_UNROLL_N 2) set(SYMV_P 8) endif() - set(SHGEMM_UNROLL_M 8) - set(SHGEMM_UNROLL_N 4) + set(SBGEMM_UNROLL_M 8) + set(SBGEMM_UNROLL_N 4) # Or should this actually be NUM_CORES? if (${NUM_THREADS} GREATER 0) diff --git a/cmake/system.cmake b/cmake/system.cmake index a504530fb..b34d4a9a5 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -70,6 +70,9 @@ if (DEFINED TARGET) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") endif() endif() + if (DEFINED HAVE_SSE3) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") + endif() endif() if (DEFINED TARGET) @@ -323,7 +326,13 @@ else () set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_STACK_ALLOC=2048") endif () endif () - +if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") +if (DEFINED BLAS3_MEM_ALLOC_THRESHOLD) +if (NOT ${BLAS3_MEM_ALLOC_THRESHOLD} EQUAL 32) +set(CCOMMON_OPT "${CCOMMON_OPT} -DBLAS3_MEM_ALLOC_THRESHOLD=${BLAS3_MEM_ALLOC_THRESHOLD}") +endif() +endif() +endif() if (DEFINED LIBNAMESUFFIX) set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}") else () @@ -401,20 +410,16 @@ if (NOT BUILD_SINGLE AND NOT BUILD_DOUBLE AND NOT BUILD_COMPLEX AND NOT BUILD_CO set (BUILD_COMPLEX16 ON) endif() if (BUILD_SINGLE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_SINGLE=1") - set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_SINGLE") endif() if (BUILD_DOUBLE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1") - set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE") endif() if (BUILD_COMPLEX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1") - set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_COMPLEX=1") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX") endif() if (BUILD_COMPLEX16) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1") - set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_COMPLEX16=1") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16") endif() if(NOT MSVC) set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${CCOMMON_OPT}") @@ -588,8 +593,8 @@ endif () #export FUNCTION_PROFILE #export TARGET_CORE # -#export SHGEMM_UNROLL_M -#export SHGEMM_UNROLL_N +#export SBGEMM_UNROLL_M +#export SBGEMM_UNROLL_N #export SGEMM_UNROLL_M #export SGEMM_UNROLL_N #export DGEMM_UNROLL_M diff --git a/cmake/utils.cmake b/cmake/utils.cmake index 1c21e776e..8f25c1b27 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -211,7 +211,7 @@ function(GenerateNamedObjects sources_in) if (complex_only) list(REMOVE_ITEM float_list "SINGLE") list(REMOVE_ITEM float_list "DOUBLE") - list(REMOVE_ITEM float_list "HALF") + list(REMOVE_ITEM float_list "BFLOAT16") elseif (real_only) list(REMOVE_ITEM float_list "COMPLEX") list(REMOVE_ITEM float_list "ZCOMPLEX") @@ -225,8 +225,8 @@ function(GenerateNamedObjects sources_in) if (NOT no_float_type) string(SUBSTRING ${float_type} 0 1 float_char) string(TOLOWER ${float_char} float_char) - if (${float_type} STREQUAL "HALF") - set (float_char "sh") + if (${float_type} STREQUAL "BFLOAT16") + set (float_char "sb") endif () endif () @@ -262,8 +262,8 @@ function(GenerateNamedObjects sources_in) if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX") list(APPEND obj_defines "DOUBLE") endif () - if (${float_type} STREQUAL "HALF") - list(APPEND obj_defines "HALF") + if (${float_type} STREQUAL "BFLOAT16") + list(APPEND obj_defines "BFLOAT16") endif () if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") list(APPEND obj_defines "COMPLEX")