Change "HALF" and "sh" to "BFLOAT16" and "sb"
This commit is contained in:
parent
7ae9e8960e
commit
2c552f1074
|
@ -29,10 +29,8 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc
|
|||
else()
|
||||
set(NO_AFFINITY 1)
|
||||
endif()
|
||||
option(BUILD_SINGLE "Single precision" OFF)
|
||||
option(BUILD_DOUBLE "Double precision" OFF)
|
||||
option(BUILD_COMPLEX "Single precision" OFF)
|
||||
option(BUILD_COMPLEX16 "Single precision" OFF)
|
||||
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
|
||||
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoids conflicts with other BLAS libraries, especially when using
|
||||
|
@ -91,13 +89,13 @@ if (NOT NO_LAPACK)
|
|||
list(APPEND SUBDIRS lapack)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED BUILD_HALF)
|
||||
set (BUILD_HALF false)
|
||||
if (NOT DEFINED BUILD_BFLOAT16)
|
||||
set (BUILD_BFLOAT16 false)
|
||||
endif ()
|
||||
# set which float types we want to build for
|
||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
||||
# if none are defined, build for all
|
||||
# set(BUILD_HALF true)
|
||||
# set(BUILD_BFLOAT16 true)
|
||||
set(BUILD_SINGLE true)
|
||||
set(BUILD_DOUBLE true)
|
||||
set(BUILD_COMPLEX true)
|
||||
|
@ -110,33 +108,28 @@ endif()
|
|||
|
||||
set(FLOAT_TYPES "")
|
||||
if (BUILD_SINGLE)
|
||||
message(STATUS "Building Songle Precision")
|
||||
list(APPEND FLOAT_TYPES "SINGLE")
|
||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1")
|
||||
message(STATUS "Building Single Precision")
|
||||
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
|
||||
endif ()
|
||||
|
||||
if (BUILD_DOUBLE)
|
||||
message(STATUS "Building Double Precision")
|
||||
list(APPEND FLOAT_TYPES "DOUBLE")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1")
|
||||
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX)
|
||||
message(STATUS "Building Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "COMPLEX")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1")
|
||||
endif ()
|
||||
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
|
||||
endif ()
|
||||
|
||||
if (BUILD_COMPLEX16)
|
||||
message(STATUS "Building Double Complex Precision")
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX")
|
||||
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1")
|
||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||
endif ()
|
||||
|
||||
if (BUILD_HALF)
|
||||
if (BUILD_BFLOAT16)
|
||||
message(STATUS "Building Half Precision")
|
||||
list(APPEND FLOAT_TYPES "HALF")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_HALF")
|
||||
list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||
|
@ -243,6 +236,9 @@ if (NOT MSVC AND NOT NOFORTRAN)
|
|||
add_subdirectory(ctest)
|
||||
endif()
|
||||
add_subdirectory(lapack-netlib/TESTING)
|
||||
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
|
||||
add_subdirectory(cpp_thread_test)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
|
|
|
@ -272,17 +272,33 @@ COMMON_PROF = -pg
|
|||
# work at all.
|
||||
#
|
||||
# CPP_THREAD_SAFETY_TEST = 1
|
||||
#
|
||||
# use this to run only the less memory-hungry GEMV test
|
||||
# CPP_THREAD_SAFETY_GEMV = 1
|
||||
|
||||
|
||||
# If you want to enable the experimental BFLOAT16 support
|
||||
# BUILD_HALF = 1
|
||||
#
|
||||
# Select if you need to build only select types
|
||||
# BUILD_SINGLE = 1
|
||||
# BUILD_DOUBLE = 1
|
||||
# BUILD_COMPLEX = 1
|
||||
# BUILD_COMPLEX16 = 1
|
||||
#
|
||||
#
|
||||
# BUILD_BFLOAT16 = 1
|
||||
|
||||
|
||||
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
|
||||
# will be allocated on the heap rather than the stack. (This array alone requires
|
||||
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
|
||||
# counts, but obviously it is not the only item that ends up on the stack.
|
||||
# The default value of 32 ensures that the overall requirement is compatible
|
||||
# with the default 1MB stacksize imposed by having the Java VM loaded without use
|
||||
# of its -Xss parameter.
|
||||
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
|
||||
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
|
||||
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
|
||||
# BLAS3_MEM_ALLOC_THRESHOLD = 160
|
||||
|
||||
|
||||
|
||||
# the below is not yet configurable, use cmake if you need to build only select types
|
||||
BUILD_SINGLE = 1
|
||||
BUILD_DOUBLE = 1
|
||||
BUILD_COMPLEX = 1
|
||||
BUILD_COMPLEX16 = 1
|
||||
# End of user configuration
|
||||
#
|
||||
|
|
|
@ -1232,8 +1232,8 @@ ifeq ($(USE_TLS), 1)
|
|||
CCOMMON_OPT += -DUSE_TLS
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_HALF), 1)
|
||||
CCOMMON_OPT += -DBUILD_HALF
|
||||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
CCOMMON_OPT += -DBUILD_BFLOAT16
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE), 1)
|
||||
CCOMMON_OPT += -DBUILD_SINGLE=1
|
||||
|
@ -1521,10 +1521,10 @@ export KERNELDIR
|
|||
export FUNCTION_PROFILE
|
||||
export TARGET_CORE
|
||||
export NO_AVX512
|
||||
export BUILD_HALF
|
||||
export BUILD_BFLOAT16
|
||||
|
||||
export SHGEMM_UNROLL_M
|
||||
export SHGEMM_UNROLL_N
|
||||
export SBGEMM_UNROLL_M
|
||||
export SBGEMM_UNROLL_N
|
||||
export SGEMM_UNROLL_M
|
||||
export SGEMM_UNROLL_N
|
||||
export DGEMM_UNROLL_M
|
||||
|
|
|
@ -24,14 +24,14 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
|||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
|
||||
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
$(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
|
||||
$(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
|
||||
|
||||
$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
|
Loading…
Reference in New Issue