Change "HALF" and "sh" to "BFLOAT16" and "sb"

This commit is contained in:
Martin Kroeker 2020-10-12 00:11:31 +02:00 committed by GitHub
parent 7ae9e8960e
commit 2c552f1074
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 48 additions and 36 deletions

View File

@ -29,10 +29,8 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc
else()
set(NO_AFFINITY 1)
endif()
option(BUILD_SINGLE "Single precision" OFF)
option(BUILD_DOUBLE "Double precision" OFF)
option(BUILD_COMPLEX "Single precision" OFF)
option(BUILD_COMPLEX16 "Single precision" OFF)
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoids conflicts with other BLAS libraries, especially when using
@ -91,13 +89,13 @@ if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack)
endif ()
if (NOT DEFINED BUILD_HALF)
set (BUILD_HALF false)
if (NOT DEFINED BUILD_BFLOAT16)
set (BUILD_BFLOAT16 false)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
# set(BUILD_HALF true)
# set(BUILD_BFLOAT16 true)
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
@ -110,33 +108,28 @@ endif()
set(FLOAT_TYPES "")
if (BUILD_SINGLE)
message(STATUS "Building Songle Precision")
list(APPEND FLOAT_TYPES "SINGLE")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1")
message(STATUS "Building Single Precision")
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
endif ()
if (BUILD_DOUBLE)
message(STATUS "Building Double Precision")
list(APPEND FLOAT_TYPES "DOUBLE")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1")
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
endif ()
if (BUILD_COMPLEX)
message(STATUS "Building Complex Precision")
list(APPEND FLOAT_TYPES "COMPLEX")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1")
endif ()
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
endif ()
if (BUILD_COMPLEX16)
message(STATUS "Building Double Complex Precision")
list(APPEND FLOAT_TYPES "ZCOMPLEX")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1")
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()
if (BUILD_HALF)
if (BUILD_BFLOAT16)
message(STATUS "Building Half Precision")
list(APPEND FLOAT_TYPES "HALF")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_HALF")
list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
endif ()
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
@ -243,6 +236,9 @@ if (NOT MSVC AND NOT NOFORTRAN)
add_subdirectory(ctest)
endif()
add_subdirectory(lapack-netlib/TESTING)
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
add_subdirectory(cpp_thread_test)
endif()
endif()
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES

View File

@ -272,17 +272,33 @@ COMMON_PROF = -pg
# work at all.
#
# CPP_THREAD_SAFETY_TEST = 1
#
# use this to run only the less memory-hungry GEMV test
# CPP_THREAD_SAFETY_GEMV = 1
# If you want to enable the experimental BFLOAT16 support
# BUILD_HALF = 1
#
# Select if you need to build only select types
# BUILD_SINGLE = 1
# BUILD_DOUBLE = 1
# BUILD_COMPLEX = 1
# BUILD_COMPLEX16 = 1
#
#
# BUILD_BFLOAT16 = 1
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
# will be allocated on the heap rather than the stack. (This array alone requires
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
# counts, but obviously it is not the only item that ends up on the stack.
# The default value of 32 ensures that the overall requirement is compatible
# with the default 1MB stacksize imposed by having the Java VM loaded without use
# of its -Xss parameter.
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
# BLAS3_MEM_ALLOC_THRESHOLD = 160
# the below is not yet configurable, use cmake if you need to build only select types
BUILD_SINGLE = 1
BUILD_DOUBLE = 1
BUILD_COMPLEX = 1
BUILD_COMPLEX16 = 1
# End of user configuration
#

View File

@ -1232,8 +1232,8 @@ ifeq ($(USE_TLS), 1)
CCOMMON_OPT += -DUSE_TLS
endif
ifeq ($(BUILD_HALF), 1)
CCOMMON_OPT += -DBUILD_HALF
ifeq ($(BUILD_BFLOAT16), 1)
CCOMMON_OPT += -DBUILD_BFLOAT16
endif
ifeq ($(BUILD_SINGLE), 1)
CCOMMON_OPT += -DBUILD_SINGLE=1
@ -1521,10 +1521,10 @@ export KERNELDIR
export FUNCTION_PROFILE
export TARGET_CORE
export NO_AVX512
export BUILD_HALF
export BUILD_BFLOAT16
export SHGEMM_UNROLL_M
export SHGEMM_UNROLL_N
export SBGEMM_UNROLL_M
export SBGEMM_UNROLL_N
export SGEMM_UNROLL_M
export SGEMM_UNROLL_N
export DGEMM_UNROLL_M

View File

@ -24,14 +24,14 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
$(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
$(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)