Change BFLOAT16 data type/API support naming
Existing BFLOAT16 support is with HALF or sh, which is not good as half precision usually points to FP16 according to IEEE definition. We'd specify this as BFLOAT16 instead of HALF. According to the discussion in issue #2767, update related naming and prefix as BFLOAT16 and sb. Details as: 1. Change the generic control flag from BUILD_HALF to BUILD_BFLOAT16; 2. Change the prefix of BFLOAT16 API from shxxxx to sbxxxx; 3. Change related file names and other related with above prefix also;
This commit is contained in:
parent
4573cb2f43
commit
93e748d67a
|
@ -87,13 +87,13 @@ if (NOT NO_LAPACK)
|
||||||
list(APPEND SUBDIRS lapack)
|
list(APPEND SUBDIRS lapack)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (NOT DEFINED BUILD_HALF)
|
if (NOT DEFINED BUILD_BFLOAT16)
|
||||||
set (BUILD_HALF false)
|
set (BUILD_BFLOAT16 false)
|
||||||
endif ()
|
endif ()
|
||||||
# set which float types we want to build for
|
# set which float types we want to build for
|
||||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
||||||
# if none are defined, build for all
|
# if none are defined, build for all
|
||||||
# set(BUILD_HALF true)
|
# set(BUILD_BFLOAT16 true)
|
||||||
set(BUILD_SINGLE true)
|
set(BUILD_SINGLE true)
|
||||||
set(BUILD_DOUBLE true)
|
set(BUILD_DOUBLE true)
|
||||||
set(BUILD_COMPLEX true)
|
set(BUILD_COMPLEX true)
|
||||||
|
@ -125,9 +125,9 @@ if (BUILD_COMPLEX16)
|
||||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (BUILD_HALF)
|
if (BUILD_BFLOAT16)
|
||||||
message(STATUS "Building Half Precision")
|
message(STATUS "Building BFloat16 Precision")
|
||||||
list(APPEND FLOAT_TYPES "HALF") # defines nothing
|
list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||||
|
|
|
@ -275,7 +275,7 @@ COMMON_PROF = -pg
|
||||||
|
|
||||||
|
|
||||||
# If you want to enable the experimental BFLOAT16 support
|
# If you want to enable the experimental BFLOAT16 support
|
||||||
# BUILD_HALF = 1
|
# BUILD_BFLOAT16 = 1
|
||||||
#
|
#
|
||||||
# End of user configuration
|
# End of user configuration
|
||||||
#
|
#
|
||||||
|
|
|
@ -1209,8 +1209,8 @@ ifeq ($(USE_TLS), 1)
|
||||||
CCOMMON_OPT += -DUSE_TLS
|
CCOMMON_OPT += -DUSE_TLS
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF), 1)
|
ifeq ($(BUILD_BFLOAT16), 1)
|
||||||
CCOMMON_OPT += -DBUILD_HALF
|
CCOMMON_OPT += -DBUILD_BFLOAT16
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
|
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
|
||||||
|
@ -1486,10 +1486,10 @@ export KERNELDIR
|
||||||
export FUNCTION_PROFILE
|
export FUNCTION_PROFILE
|
||||||
export TARGET_CORE
|
export TARGET_CORE
|
||||||
export NO_AVX512
|
export NO_AVX512
|
||||||
export BUILD_HALF
|
export BUILD_BFLOAT16
|
||||||
|
|
||||||
export SHGEMM_UNROLL_M
|
export SBGEMM_UNROLL_M
|
||||||
export SHGEMM_UNROLL_N
|
export SBGEMM_UNROLL_N
|
||||||
export SGEMM_UNROLL_M
|
export SGEMM_UNROLL_M
|
||||||
export SGEMM_UNROLL_N
|
export SGEMM_UNROLL_N
|
||||||
export DGEMM_UNROLL_M
|
export DGEMM_UNROLL_M
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
SHBLASOBJS_P = $(SHBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
@ -10,8 +10,8 @@ COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
|
||||||
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
|
||||||
BLASOBJS = $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
BLASOBJS = $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
||||||
BLASOBJS_P = $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
|
BLASOBJS_P = $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
|
||||||
|
|
||||||
ifdef EXPRECISION
|
ifdef EXPRECISION
|
||||||
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||||
|
@ -23,7 +23,7 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
|
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
|
||||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||||
|
@ -31,7 +31,7 @@ $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||||
|
|
||||||
$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
|
|
|
@ -49,10 +49,10 @@ else
|
||||||
GOTO_LAPACK_TARGETS=
|
GOTO_LAPACK_TARGETS=
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
GOTO_HALF_TARGETS=shgemm.goto
|
GOTO_BFLOAT16_TARGETS=sbgemm.goto
|
||||||
else
|
else
|
||||||
GOTO_HALF_TARGETS=
|
GOTO_BFLOAT16_TARGETS=
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
|
@ -97,7 +97,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
||||||
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
|
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
|
||||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
|
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
|
||||||
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto $(GOTO_HALF_TARGETS)
|
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto $(GOTO_BFLOAT16_TARGETS)
|
||||||
|
|
||||||
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||||
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
||||||
|
@ -270,7 +270,7 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
||||||
samin.goto damin.goto camin.goto zamin.goto \
|
samin.goto damin.goto camin.goto zamin.goto \
|
||||||
smin.goto dmin.goto \
|
smin.goto dmin.goto \
|
||||||
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto \
|
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto \
|
||||||
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS) $(GOTO_HALF_TARGETS)
|
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS) $(GOTO_BFLOAT16_TARGETS)
|
||||||
|
|
||||||
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||||
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
||||||
|
@ -620,8 +620,8 @@ zcholesky.essl : zcholesky.$(SUFFIX)
|
||||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||||
|
|
||||||
##################################### Sgemm ####################################################
|
##################################### Sgemm ####################################################
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
shgemm.goto : shgemm.$(SUFFIX) ../$(LIBNAME)
|
sbgemm.goto : sbgemm.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -2927,9 +2927,9 @@ ccholesky.$(SUFFIX) : cholesky.c
|
||||||
zcholesky.$(SUFFIX) : cholesky.c
|
zcholesky.$(SUFFIX) : cholesky.c
|
||||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
shgemm.$(SUFFIX) : gemm.c
|
sbgemm.$(SUFFIX) : gemm.c
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UCOMPLEX -UDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||||
endif
|
endif
|
||||||
|
|
||||||
sgemm.$(SUFFIX) : gemm.c
|
sgemm.$(SUFFIX) : gemm.c
|
||||||
|
|
|
@ -39,8 +39,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define GEMM BLASFUNC(dgemm)
|
#define GEMM BLASFUNC(dgemm)
|
||||||
#elif defined(HALF)
|
#elif defined(BFLOAT16)
|
||||||
#define GEMM BLASFUNC(shgemm)
|
#define GEMM BLASFUNC(sbgemm)
|
||||||
#else
|
#else
|
||||||
#define GEMM BLASFUNC(sgemm)
|
#define GEMM BLASFUNC(sgemm)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -113,7 +113,7 @@ macro(SetDefaultL1)
|
||||||
set(ZSUMKERNEL zsum.S)
|
set(ZSUMKERNEL zsum.S)
|
||||||
set(QSUMKERNEL sum.S)
|
set(QSUMKERNEL sum.S)
|
||||||
set(XSUMKERNEL zsum.S)
|
set(XSUMKERNEL zsum.S)
|
||||||
if (BUILD_HALF)
|
if (BUILD_BFLOAT16)
|
||||||
set(SHAMINKERNEL ../arm/amin.c)
|
set(SHAMINKERNEL ../arm/amin.c)
|
||||||
set(SHAMAXKERNEL ../arm/amax.c)
|
set(SHAMAXKERNEL ../arm/amax.c)
|
||||||
set(SHMAXKERNEL ../arm/max.c)
|
set(SHMAXKERNEL ../arm/max.c)
|
||||||
|
@ -181,7 +181,7 @@ macro(SetDefaultL2)
|
||||||
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
|
||||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||||
if (BUILD_HALF)
|
if (BUILD_BFLOAT16)
|
||||||
set(SHGEMVNKERNEL ../arm/gemv_n.c)
|
set(SHGEMVNKERNEL ../arm/gemv_n.c)
|
||||||
set(SHGEMVTKERNEL ../arm/gemv_t.c)
|
set(SHGEMVTKERNEL ../arm/gemv_t.c)
|
||||||
set(SHGERKERNEL ../generic/ger.c)
|
set(SHGERKERNEL ../generic/ger.c)
|
||||||
|
@ -193,18 +193,18 @@ macro(SetDefaultL3)
|
||||||
set(DGEADD_KERNEL ../generic/geadd.c)
|
set(DGEADD_KERNEL ../generic/geadd.c)
|
||||||
set(CGEADD_KERNEL ../generic/zgeadd.c)
|
set(CGEADD_KERNEL ../generic/zgeadd.c)
|
||||||
set(ZGEADD_KERNEL ../generic/zgeadd.c)
|
set(ZGEADD_KERNEL ../generic/zgeadd.c)
|
||||||
if (BUILD_HALF)
|
if (BUILD_BFLOAT16)
|
||||||
set(SHGEADD_KERNEL ../generic/geadd.c)
|
set(SHGEADD_KERNEL ../generic/geadd.c)
|
||||||
set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
|
set(SBGEMMKERNEL ../generic/gemmkernel_2x2.c)
|
||||||
set(SHGEMM_BETA ../generic/gemm_beta.c)
|
set(SBGEMM_BETA ../generic/gemm_beta.c)
|
||||||
set(SHGEMMINCOPY ../generic/gemm_ncopy_2.c)
|
set(SBGEMMINCOPY ../generic/gemm_ncopy_2.c)
|
||||||
set(SHGEMMITCOPY ../generic/gemm_tcopy_2.c)
|
set(SBGEMMITCOPY ../generic/gemm_tcopy_2.c)
|
||||||
set(SHGEMMONCOPY ../generic/gemm_ncopy_2.c)
|
set(SBGEMMONCOPY ../generic/gemm_ncopy_2.c)
|
||||||
set(SHGEMMOTCOPY ../generic/gemm_tcopy_2.c)
|
set(SBGEMMOTCOPY ../generic/gemm_tcopy_2.c)
|
||||||
set(SHGEMMINCOPYOBJ shgemm_incopy.o)
|
set(SBGEMMINCOPYOBJ sbgemm_incopy.o)
|
||||||
set(SHGEMMITCOPYOBJ shgemm_itcopy.o)
|
set(SBGEMMITCOPYOBJ sbgemm_itcopy.o)
|
||||||
set(SHGEMMONCOPYOBJ shgemm_oncopy.o)
|
set(SBGEMMONCOPYOBJ sbgemm_oncopy.o)
|
||||||
set(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
|
set(SBGEMMOTCOPYOBJ sbgemm_otcopy.o)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
endmacro ()
|
endmacro ()
|
||||||
|
|
|
@ -16,8 +16,8 @@
|
||||||
# HAVE_SSE2
|
# HAVE_SSE2
|
||||||
# HAVE_SSE3
|
# HAVE_SSE3
|
||||||
# MAKE
|
# MAKE
|
||||||
# SHGEMM_UNROLL_M
|
# SBGEMM_UNROLL_M
|
||||||
# SHGEMM_UNROLL_N
|
# SBGEMM_UNROLL_N
|
||||||
# SGEMM_UNROLL_M
|
# SGEMM_UNROLL_M
|
||||||
# SGEMM_UNROLL_N
|
# SGEMM_UNROLL_N
|
||||||
# DGEMM_UNROLL_M
|
# DGEMM_UNROLL_M
|
||||||
|
@ -471,8 +471,8 @@ endif ()
|
||||||
set(ZGEMM_UNROLL_N 2)
|
set(ZGEMM_UNROLL_N 2)
|
||||||
set(SYMV_P 8)
|
set(SYMV_P 8)
|
||||||
endif()
|
endif()
|
||||||
set(SHGEMM_UNROLL_M 8)
|
set(SBGEMM_UNROLL_M 8)
|
||||||
set(SHGEMM_UNROLL_N 4)
|
set(SBGEMM_UNROLL_N 4)
|
||||||
|
|
||||||
# Or should this actually be NUM_CORES?
|
# Or should this actually be NUM_CORES?
|
||||||
if (${NUM_THREADS} GREATER 0)
|
if (${NUM_THREADS} GREATER 0)
|
||||||
|
|
|
@ -548,8 +548,8 @@ endif ()
|
||||||
#export FUNCTION_PROFILE
|
#export FUNCTION_PROFILE
|
||||||
#export TARGET_CORE
|
#export TARGET_CORE
|
||||||
#
|
#
|
||||||
#export SHGEMM_UNROLL_M
|
#export SBGEMM_UNROLL_M
|
||||||
#export SHGEMM_UNROLL_N
|
#export SBGEMM_UNROLL_N
|
||||||
#export SGEMM_UNROLL_M
|
#export SGEMM_UNROLL_M
|
||||||
#export SGEMM_UNROLL_N
|
#export SGEMM_UNROLL_N
|
||||||
#export DGEMM_UNROLL_M
|
#export DGEMM_UNROLL_M
|
||||||
|
|
|
@ -211,7 +211,7 @@ function(GenerateNamedObjects sources_in)
|
||||||
if (complex_only)
|
if (complex_only)
|
||||||
list(REMOVE_ITEM float_list "SINGLE")
|
list(REMOVE_ITEM float_list "SINGLE")
|
||||||
list(REMOVE_ITEM float_list "DOUBLE")
|
list(REMOVE_ITEM float_list "DOUBLE")
|
||||||
list(REMOVE_ITEM float_list "HALF")
|
list(REMOVE_ITEM float_list "BFLOAT16")
|
||||||
elseif (real_only)
|
elseif (real_only)
|
||||||
list(REMOVE_ITEM float_list "COMPLEX")
|
list(REMOVE_ITEM float_list "COMPLEX")
|
||||||
list(REMOVE_ITEM float_list "ZCOMPLEX")
|
list(REMOVE_ITEM float_list "ZCOMPLEX")
|
||||||
|
@ -225,8 +225,8 @@ function(GenerateNamedObjects sources_in)
|
||||||
if (NOT no_float_type)
|
if (NOT no_float_type)
|
||||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||||
string(TOLOWER ${float_char} float_char)
|
string(TOLOWER ${float_char} float_char)
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "BFLOAT16")
|
||||||
set (float_char "sh")
|
set (float_char "sb")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
@ -262,8 +262,8 @@ function(GenerateNamedObjects sources_in)
|
||||||
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
|
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||||
list(APPEND obj_defines "DOUBLE")
|
list(APPEND obj_defines "DOUBLE")
|
||||||
endif ()
|
endif ()
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "BFLOAT16")
|
||||||
list(APPEND obj_defines "HALF")
|
list(APPEND obj_defines "BFLOAT16")
|
||||||
endif ()
|
endif ()
|
||||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||||
list(APPEND obj_defines "COMPLEX")
|
list(APPEND obj_defines "COMPLEX")
|
||||||
|
|
6
common.h
6
common.h
|
@ -257,9 +257,9 @@ typedef long BLASLONG;
|
||||||
typedef unsigned long BLASULONG;
|
typedef unsigned long BLASULONG;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef BFLOAT16
|
#ifndef BUILD_IN_BFLOAT16
|
||||||
typedef unsigned short bfloat16;
|
typedef unsigned short bfloat16;
|
||||||
#define HALFCONVERSION 1
|
#define BFLOAT16CONVERSION 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef USE64BITINT
|
#ifdef USE64BITINT
|
||||||
|
@ -302,7 +302,7 @@ typedef int blasint;
|
||||||
#define SIZE 8
|
#define SIZE 8
|
||||||
#define BASE_SHIFT 3
|
#define BASE_SHIFT 3
|
||||||
#define ZBASE_SHIFT 4
|
#define ZBASE_SHIFT 4
|
||||||
#elif defined(HALF)
|
#elif defined(BFLOAT16)
|
||||||
#define IFLOAT bfloat16
|
#define IFLOAT bfloat16
|
||||||
#define XFLOAT IFLOAT
|
#define XFLOAT IFLOAT
|
||||||
#define FLOAT float
|
#define FLOAT float
|
||||||
|
|
|
@ -469,7 +469,7 @@ void BLASFUNC(xhbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint
|
||||||
|
|
||||||
/* Level 3 routines */
|
/* Level 3 routines */
|
||||||
|
|
||||||
void BLASFUNC(shgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
|
void BLASFUNC(sbgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
|
||||||
bfloat16 *, blasint *, bfloat16 *, blasint *, float *, float *, blasint *);
|
bfloat16 *, blasint *, bfloat16 *, blasint *, float *, float *, blasint *);
|
||||||
void BLASFUNC(sgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
|
void BLASFUNC(sgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
|
||||||
float *, blasint *, float *, blasint *, float *, float *, blasint *);
|
float *, blasint *, float *, blasint *, float *, float *, blasint *);
|
||||||
|
|
|
@ -55,7 +55,7 @@ extern void sgemm_kernel_direct(BLASLONG M, BLASLONG N, BLASLONG K,
|
||||||
extern int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
|
extern int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
|
||||||
|
|
||||||
|
|
||||||
int shgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
int sbgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
||||||
bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
|
bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
|
||||||
int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
||||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||||
|
@ -78,10 +78,10 @@ int xgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
|
||||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int shgemm_incopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
int sbgemm_incopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||||
int shgemm_itcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
int sbgemm_itcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||||
int shgemm_oncopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
int sbgemm_oncopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||||
int shgemm_otcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
int sbgemm_otcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||||
int sgemm_incopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
|
int sgemm_incopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
|
||||||
int sgemm_itcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
|
int sgemm_itcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
|
||||||
int sgemm_oncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
|
int sgemm_oncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
|
||||||
|
@ -505,7 +505,7 @@ int xher2k_kernel_UC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdoubl
|
||||||
int xher2k_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
|
int xher2k_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
|
||||||
int xher2k_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
|
int xher2k_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
|
||||||
|
|
||||||
int shgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
|
int sbgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
|
||||||
int sgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);
|
int sgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);
|
||||||
int dgemm_kernel(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
|
int dgemm_kernel(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
|
||||||
|
|
||||||
|
@ -534,10 +534,10 @@ int cgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float
|
||||||
int zgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
|
int zgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
|
||||||
int xgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
|
int xgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
|
||||||
|
|
||||||
int shgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
int sbgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||||
int shgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
int sbgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||||
int shgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
int sbgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||||
int shgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
int sbgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||||
|
|
||||||
int sgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
int sgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||||
int sgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
int sgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||||
|
@ -631,10 +631,10 @@ int xgemm_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLON
|
||||||
int xgemm_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
int xgemm_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int shgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
int sbgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||||
int shgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
int sbgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||||
int shgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
int sbgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||||
int shgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
int sbgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||||
|
|
||||||
int sgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
int sgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||||
int sgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
int sgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
#ifndef COMMON_MACRO
|
#ifndef COMMON_MACRO
|
||||||
#define COMMON_MACRO
|
#define COMMON_MACRO
|
||||||
|
|
||||||
#include "common_sh.h"
|
#include "common_sb.h"
|
||||||
#include "common_s.h"
|
#include "common_s.h"
|
||||||
#include "common_d.h"
|
#include "common_d.h"
|
||||||
#include "common_q.h"
|
#include "common_q.h"
|
||||||
|
@ -644,7 +644,7 @@
|
||||||
|
|
||||||
#define GEADD_K DGEADD_K
|
#define GEADD_K DGEADD_K
|
||||||
|
|
||||||
#elif defined(HALF)
|
#elif defined(BFLOAT16)
|
||||||
|
|
||||||
#define AMAX_K SAMAX_K
|
#define AMAX_K SAMAX_K
|
||||||
#define AMIN_K SAMIN_K
|
#define AMIN_K SAMIN_K
|
||||||
|
@ -676,32 +676,32 @@
|
||||||
#define NRM2_K SNRM2_K
|
#define NRM2_K SNRM2_K
|
||||||
#define SYMV_THREAD_U SSYMV_THREAD_U
|
#define SYMV_THREAD_U SSYMV_THREAD_U
|
||||||
#define SYMV_THREAD_L SSYMV_THREAD_L
|
#define SYMV_THREAD_L SSYMV_THREAD_L
|
||||||
#define GEMM_BETA SHGEMM_BETA
|
#define GEMM_BETA SBGEMM_BETA
|
||||||
#define GEMM_KERNEL_N SHGEMM_KERNEL
|
#define GEMM_KERNEL_N SBGEMM_KERNEL
|
||||||
#define GEMM_KERNEL_L SHGEMM_KERNEL
|
#define GEMM_KERNEL_L SBGEMM_KERNEL
|
||||||
#define GEMM_KERNEL_R SHGEMM_KERNEL
|
#define GEMM_KERNEL_R SBGEMM_KERNEL
|
||||||
#define GEMM_KERNEL_B SHGEMM_KERNEL
|
#define GEMM_KERNEL_B SBGEMM_KERNEL
|
||||||
|
|
||||||
#define GEMM_NN SHGEMM_NN
|
#define GEMM_NN SBGEMM_NN
|
||||||
#define GEMM_CN SHGEMM_TN
|
#define GEMM_CN SBGEMM_TN
|
||||||
#define GEMM_TN SHGEMM_TN
|
#define GEMM_TN SBGEMM_TN
|
||||||
#define GEMM_NC SHGEMM_NT
|
#define GEMM_NC SBGEMM_NT
|
||||||
#define GEMM_NT SHGEMM_NT
|
#define GEMM_NT SBGEMM_NT
|
||||||
#define GEMM_CC SHGEMM_TT
|
#define GEMM_CC SBGEMM_TT
|
||||||
#define GEMM_CT SHGEMM_TT
|
#define GEMM_CT SBGEMM_TT
|
||||||
#define GEMM_TC SHGEMM_TT
|
#define GEMM_TC SBGEMM_TT
|
||||||
#define GEMM_TT SHGEMM_TT
|
#define GEMM_TT SBGEMM_TT
|
||||||
#define GEMM_NR SHGEMM_NN
|
#define GEMM_NR SBGEMM_NN
|
||||||
#define GEMM_TR SHGEMM_TN
|
#define GEMM_TR SBGEMM_TN
|
||||||
#define GEMM_CR SHGEMM_TN
|
#define GEMM_CR SBGEMM_TN
|
||||||
#define GEMM_RN SHGEMM_NN
|
#define GEMM_RN SBGEMM_NN
|
||||||
#define GEMM_RT SHGEMM_NT
|
#define GEMM_RT SBGEMM_NT
|
||||||
#define GEMM_RC SHGEMM_NT
|
#define GEMM_RC SBGEMM_NT
|
||||||
#define GEMM_RR SHGEMM_NN
|
#define GEMM_RR SBGEMM_NN
|
||||||
#define GEMM_ONCOPY SHGEMM_ONCOPY
|
#define GEMM_ONCOPY SBGEMM_ONCOPY
|
||||||
#define GEMM_OTCOPY SHGEMM_OTCOPY
|
#define GEMM_OTCOPY SBGEMM_OTCOPY
|
||||||
#define GEMM_INCOPY SHGEMM_INCOPY
|
#define GEMM_INCOPY SBGEMM_INCOPY
|
||||||
#define GEMM_ITCOPY SHGEMM_ITCOPY
|
#define GEMM_ITCOPY SBGEMM_ITCOPY
|
||||||
#define SYMM_THREAD_LU SSYMM_THREAD_LU
|
#define SYMM_THREAD_LU SSYMM_THREAD_LU
|
||||||
#define SYMM_THREAD_LL SSYMM_THREAD_LL
|
#define SYMM_THREAD_LL SSYMM_THREAD_LL
|
||||||
#define SYMM_THREAD_RU SSYMM_THREAD_RU
|
#define SYMM_THREAD_RU SSYMM_THREAD_RU
|
||||||
|
@ -717,22 +717,22 @@
|
||||||
#define HEMM_THREAD_RU SHEMM_THREAD_RU
|
#define HEMM_THREAD_RU SHEMM_THREAD_RU
|
||||||
#define HEMM_THREAD_RL SHEMM_THREAD_RL
|
#define HEMM_THREAD_RL SHEMM_THREAD_RL
|
||||||
|
|
||||||
#define GEMM_THREAD_NN SHGEMM_THREAD_NN
|
#define GEMM_THREAD_NN SBGEMM_THREAD_NN
|
||||||
#define GEMM_THREAD_CN SHGEMM_THREAD_TN
|
#define GEMM_THREAD_CN SBGEMM_THREAD_TN
|
||||||
#define GEMM_THREAD_TN SHGEMM_THREAD_TN
|
#define GEMM_THREAD_TN SBGEMM_THREAD_TN
|
||||||
#define GEMM_THREAD_NC SHGEMM_THREAD_NT
|
#define GEMM_THREAD_NC SBGEMM_THREAD_NT
|
||||||
#define GEMM_THREAD_NT SHGEMM_THREAD_NT
|
#define GEMM_THREAD_NT SBGEMM_THREAD_NT
|
||||||
#define GEMM_THREAD_CC SHGEMM_THREAD_TT
|
#define GEMM_THREAD_CC SBGEMM_THREAD_TT
|
||||||
#define GEMM_THREAD_CT SHGEMM_THREAD_TT
|
#define GEMM_THREAD_CT SBGEMM_THREAD_TT
|
||||||
#define GEMM_THREAD_TC SHGEMM_THREAD_TT
|
#define GEMM_THREAD_TC SBGEMM_THREAD_TT
|
||||||
#define GEMM_THREAD_TT SHGEMM_THREAD_TT
|
#define GEMM_THREAD_TT SBGEMM_THREAD_TT
|
||||||
#define GEMM_THREAD_NR SHGEMM_THREAD_NN
|
#define GEMM_THREAD_NR SBGEMM_THREAD_NN
|
||||||
#define GEMM_THREAD_TR SHGEMM_THREAD_TN
|
#define GEMM_THREAD_TR SBGEMM_THREAD_TN
|
||||||
#define GEMM_THREAD_CR SHGEMM_THREAD_TN
|
#define GEMM_THREAD_CR SBGEMM_THREAD_TN
|
||||||
#define GEMM_THREAD_RN SHGEMM_THREAD_NN
|
#define GEMM_THREAD_RN SBGEMM_THREAD_NN
|
||||||
#define GEMM_THREAD_RT SHGEMM_THREAD_NT
|
#define GEMM_THREAD_RT SBGEMM_THREAD_NT
|
||||||
#define GEMM_THREAD_RC SHGEMM_THREAD_NT
|
#define GEMM_THREAD_RC SBGEMM_THREAD_NT
|
||||||
#define GEMM_THREAD_RR SHGEMM_THREAD_NN
|
#define GEMM_THREAD_RR SBGEMM_THREAD_NN
|
||||||
|
|
||||||
#ifdef UNIT
|
#ifdef UNIT
|
||||||
|
|
||||||
|
@ -2485,9 +2485,9 @@
|
||||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
|
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
|
||||||
extern BLASLONG gemm_offset_a;
|
extern BLASLONG gemm_offset_a;
|
||||||
extern BLASLONG gemm_offset_b;
|
extern BLASLONG gemm_offset_b;
|
||||||
extern BLASLONG shgemm_p;
|
extern BLASLONG sbgemm_p;
|
||||||
extern BLASLONG shgemm_q;
|
extern BLASLONG sbgemm_q;
|
||||||
extern BLASLONG shgemm_r;
|
extern BLASLONG sbgemm_r;
|
||||||
extern BLASLONG sgemm_p;
|
extern BLASLONG sgemm_p;
|
||||||
extern BLASLONG sgemm_q;
|
extern BLASLONG sgemm_q;
|
||||||
extern BLASLONG sgemm_r;
|
extern BLASLONG sgemm_r;
|
||||||
|
|
|
@ -47,9 +47,9 @@ typedef struct {
|
||||||
int dtb_entries;
|
int dtb_entries;
|
||||||
int offsetA, offsetB, align;
|
int offsetA, offsetB, align;
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
int shgemm_p, shgemm_q, shgemm_r;
|
int sbgemm_p, sbgemm_q, sbgemm_r;
|
||||||
int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn;
|
int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
|
||||||
|
|
||||||
float (*shamax_k) (BLASLONG, float *, BLASLONG);
|
float (*shamax_k) (BLASLONG, float *, BLASLONG);
|
||||||
float (*shamin_k) (BLASLONG, float *, BLASLONG);
|
float (*shamin_k) (BLASLONG, float *, BLASLONG);
|
||||||
|
@ -80,13 +80,13 @@ BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG);
|
||||||
int (*shsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*shsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||||
int (*shsymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*shsymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||||
|
|
||||||
int (*shgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
|
int (*sbgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
|
||||||
int (*shgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
|
int (*sbgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
|
||||||
|
|
||||||
int (*shgemm_incopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
int (*sbgemm_incopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||||
int (*shgemm_itcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
int (*sbgemm_itcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||||
int (*shgemm_oncopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
int (*sbgemm_oncopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||||
int (*shgemm_otcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
int (*sbgemm_otcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||||
|
|
||||||
int (*shtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
int (*shtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||||
int (*shtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
int (*shtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||||
|
@ -1002,13 +1002,13 @@ extern gotoblas_t *gotoblas;
|
||||||
|
|
||||||
#define HAVE_EX_L2 gotoblas -> exclusive_cache
|
#define HAVE_EX_L2 gotoblas -> exclusive_cache
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
#define SHGEMM_P gotoblas -> shgemm_p
|
#define SBGEMM_P gotoblas -> sbgemm_p
|
||||||
#define SHGEMM_Q gotoblas -> shgemm_q
|
#define SBGEMM_Q gotoblas -> sbgemm_q
|
||||||
#define SHGEMM_R gotoblas -> shgemm_r
|
#define SBGEMM_R gotoblas -> sbgemm_r
|
||||||
#define SHGEMM_UNROLL_M gotoblas -> shgemm_unroll_m
|
#define SBGEMM_UNROLL_M gotoblas -> sbgemm_unroll_m
|
||||||
#define SHGEMM_UNROLL_N gotoblas -> shgemm_unroll_n
|
#define SBGEMM_UNROLL_N gotoblas -> sbgemm_unroll_n
|
||||||
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
|
#define SBGEMM_UNROLL_MN gotoblas -> sbgemm_unroll_mn
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define SGEMM_P gotoblas -> sgemm_p
|
#define SGEMM_P gotoblas -> sgemm_p
|
||||||
|
@ -1088,16 +1088,16 @@ extern gotoblas_t *gotoblas;
|
||||||
#define HAVE_EX_L2 0
|
#define HAVE_EX_L2 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
#define SHGEMM_P SHGEMM_DEFAULT_P
|
#define SBGEMM_P SBGEMM_DEFAULT_P
|
||||||
#define SHGEMM_Q SHGEMM_DEFAULT_Q
|
#define SBGEMM_Q SBGEMM_DEFAULT_Q
|
||||||
#define SHGEMM_R SHGEMM_DEFAULT_R
|
#define SBGEMM_R SBGEMM_DEFAULT_R
|
||||||
#define SHGEMM_UNROLL_M SHGEMM_DEFAULT_UNROLL_M
|
#define SBGEMM_UNROLL_M SBGEMM_DEFAULT_UNROLL_M
|
||||||
#define SHGEMM_UNROLL_N SHGEMM_DEFAULT_UNROLL_N
|
#define SBGEMM_UNROLL_N SBGEMM_DEFAULT_UNROLL_N
|
||||||
#ifdef SHGEMM_DEFAULT_UNROLL_MN
|
#ifdef SBGEMM_DEFAULT_UNROLL_MN
|
||||||
#define SHGEMM_UNROLL_MN SHGEMM_DEFAULT_UNROLL_MN
|
#define SBGEMM_UNROLL_MN SBGEMM_DEFAULT_UNROLL_MN
|
||||||
#else
|
#else
|
||||||
#define SHGEMM_UNROLL_MN MAX((SHGEMM_UNROLL_M), (SHGEMM_UNROLL_N))
|
#define SBGEMM_UNROLL_MN MAX((SBGEMM_UNROLL_M), (SBGEMM_UNROLL_N))
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1237,17 +1237,17 @@ extern gotoblas_t *gotoblas;
|
||||||
#define GEMM_DEFAULT_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
#define GEMM_DEFAULT_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
||||||
#define GEMM_DEFAULT_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
#define GEMM_DEFAULT_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
||||||
#elif defined(HALF)
|
#elif defined(HALF)
|
||||||
#define GEMM_P SHGEMM_P
|
#define GEMM_P SBGEMM_P
|
||||||
#define GEMM_Q SHGEMM_Q
|
#define GEMM_Q SBGEMM_Q
|
||||||
#define GEMM_R SHGEMM_R
|
#define GEMM_R SBGEMM_R
|
||||||
#define GEMM_UNROLL_M SHGEMM_UNROLL_M
|
#define GEMM_UNROLL_M SBGEMM_UNROLL_M
|
||||||
#define GEMM_UNROLL_N SHGEMM_UNROLL_N
|
#define GEMM_UNROLL_N SBGEMM_UNROLL_N
|
||||||
#define GEMM_UNROLL_MN SHGEMM_UNROLL_MN
|
#define GEMM_UNROLL_MN SBGEMM_UNROLL_MN
|
||||||
#define GEMM_DEFAULT_P SHGEMM_DEFAULT_P
|
#define GEMM_DEFAULT_P SBGEMM_DEFAULT_P
|
||||||
#define GEMM_DEFAULT_Q SHGEMM_DEFAULT_Q
|
#define GEMM_DEFAULT_Q SBGEMM_DEFAULT_Q
|
||||||
#define GEMM_DEFAULT_R SHGEMM_DEFAULT_R
|
#define GEMM_DEFAULT_R SBGEMM_DEFAULT_R
|
||||||
#define GEMM_DEFAULT_UNROLL_M SHGEMM_DEFAULT_UNROLL_M
|
#define GEMM_DEFAULT_UNROLL_M SBGEMM_DEFAULT_UNROLL_M
|
||||||
#define GEMM_DEFAULT_UNROLL_N SHGEMM_DEFAULT_UNROLL_N
|
#define GEMM_DEFAULT_UNROLL_N SBGEMM_DEFAULT_UNROLL_N
|
||||||
#else
|
#else
|
||||||
#define GEMM_P SGEMM_P
|
#define GEMM_P SGEMM_P
|
||||||
#define GEMM_Q SGEMM_Q
|
#define GEMM_Q SGEMM_Q
|
||||||
|
@ -1333,8 +1333,8 @@ extern gotoblas_t *gotoblas;
|
||||||
#define GEMM_THREAD gemm_thread_n
|
#define GEMM_THREAD gemm_thread_n
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SHGEMM_DEFAULT_R
|
#ifndef SBGEMM_DEFAULT_R
|
||||||
#define SHGEMM_DEFAULT_R (((BUFFER_SIZE - ((SHGEMM_DEFAULT_P * SHGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SHGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
|
#define SBGEMM_DEFAULT_R (((BUFFER_SIZE - ((SBGEMM_DEFAULT_P * SBGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SBGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SGEMM_DEFAULT_R
|
#ifndef SGEMM_DEFAULT_R
|
||||||
|
|
|
@ -0,0 +1,65 @@
|
||||||
|
#ifndef COMMON_SH_H
|
||||||
|
#define COMMON_SH_H
|
||||||
|
|
||||||
|
#ifndef DYNAMIC_ARCH
|
||||||
|
|
||||||
|
#define SBGEMM_ONCOPY sbgemm_oncopy
|
||||||
|
#define SBGEMM_OTCOPY sbgemm_otcopy
|
||||||
|
|
||||||
|
#if SBGEMM_DEFAULT_UNROLL_M == sbgemm_DEFAULT_UNROLL_N
|
||||||
|
#define SBGEMM_INCOPY sbgemm_oncopy
|
||||||
|
#define SBGEMM_ITCOPY sbgemm_otcopy
|
||||||
|
#else
|
||||||
|
#define SBGEMM_INCOPY sbgemm_incopy
|
||||||
|
#define SBGEMM_ITCOPY sbgemm_itcopy
|
||||||
|
#endif
|
||||||
|
#define SBGEMM_BETA sbgemm_beta
|
||||||
|
#define SBGEMM_KERNEL sbgemm_kernel
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy
|
||||||
|
#define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy
|
||||||
|
#define SBGEMM_INCOPY gotoblas -> sbgemm_incopy
|
||||||
|
#define SBGEMM_ITCOPY gotoblas -> sbgemm_itcopy
|
||||||
|
#define SBGEMM_BETA gotoblas -> sbgemm_beta
|
||||||
|
#define SBGEMM_KERNEL gotoblas -> sbgemm_kernel
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define SBGEMM_NN sbgemm_nn
|
||||||
|
#define SBGEMM_CN sbgemm_tn
|
||||||
|
#define SBGEMM_TN sbgemm_tn
|
||||||
|
#define SBGEMM_NC sbgemm_nt
|
||||||
|
#define SBGEMM_NT sbgemm_nt
|
||||||
|
#define SBGEMM_CC sbgemm_tt
|
||||||
|
#define SBGEMM_CT sbgemm_tt
|
||||||
|
#define SBGEMM_TC sbgemm_tt
|
||||||
|
#define SBGEMM_TT sbgemm_tt
|
||||||
|
#define SBGEMM_NR sbgemm_nn
|
||||||
|
#define SBGEMM_TR sbgemm_tn
|
||||||
|
#define SBGEMM_CR sbgemm_tn
|
||||||
|
#define SBGEMM_RN sbgemm_nn
|
||||||
|
#define SBGEMM_RT sbgemm_nt
|
||||||
|
#define SBGEMM_RC sbgemm_nt
|
||||||
|
#define SBGEMM_RR sbgemm_nn
|
||||||
|
|
||||||
|
#define SBGEMM_THREAD_NN sbgemm_thread_nn
|
||||||
|
#define SBGEMM_THREAD_CN sbgemm_thread_tn
|
||||||
|
#define SBGEMM_THREAD_TN sbgemm_thread_tn
|
||||||
|
#define SBGEMM_THREAD_NC sbgemm_thread_nt
|
||||||
|
#define SBGEMM_THREAD_NT sbgemm_thread_nt
|
||||||
|
#define SBGEMM_THREAD_CC sbgemm_thread_tt
|
||||||
|
#define SBGEMM_THREAD_CT sbgemm_thread_tt
|
||||||
|
#define SBGEMM_THREAD_TC sbgemm_thread_tt
|
||||||
|
#define SBGEMM_THREAD_TT sbgemm_thread_tt
|
||||||
|
#define SBGEMM_THREAD_NR sbgemm_thread_nn
|
||||||
|
#define SBGEMM_THREAD_TR sbgemm_thread_tn
|
||||||
|
#define SBGEMM_THREAD_CR sbgemm_thread_tn
|
||||||
|
#define SBGEMM_THREAD_RN sbgemm_thread_nn
|
||||||
|
#define SBGEMM_THREAD_RT sbgemm_thread_nt
|
||||||
|
#define SBGEMM_THREAD_RC sbgemm_thread_nt
|
||||||
|
#define SBGEMM_THREAD_RR sbgemm_thread_nn
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
65
common_sh.h
65
common_sh.h
|
@ -1,65 +0,0 @@
|
||||||
#ifndef COMMON_SH_H
|
|
||||||
#define COMMON_SH_H
|
|
||||||
|
|
||||||
#ifndef DYNAMIC_ARCH
|
|
||||||
|
|
||||||
#define SHGEMM_ONCOPY shgemm_oncopy
|
|
||||||
#define SHGEMM_OTCOPY shgemm_otcopy
|
|
||||||
|
|
||||||
#if SHGEMM_DEFAULT_UNROLL_M == SHGEMM_DEFAULT_UNROLL_N
|
|
||||||
#define SHGEMM_INCOPY shgemm_oncopy
|
|
||||||
#define SHGEMM_ITCOPY shgemm_otcopy
|
|
||||||
#else
|
|
||||||
#define SHGEMM_INCOPY shgemm_incopy
|
|
||||||
#define SHGEMM_ITCOPY shgemm_itcopy
|
|
||||||
#endif
|
|
||||||
#define SHGEMM_BETA shgemm_beta
|
|
||||||
#define SHGEMM_KERNEL shgemm_kernel
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#define SHGEMM_ONCOPY gotoblas -> shgemm_oncopy
|
|
||||||
#define SHGEMM_OTCOPY gotoblas -> shgemm_otcopy
|
|
||||||
#define SHGEMM_INCOPY gotoblas -> shgemm_incopy
|
|
||||||
#define SHGEMM_ITCOPY gotoblas -> shgemm_itcopy
|
|
||||||
#define SHGEMM_BETA gotoblas -> shgemm_beta
|
|
||||||
#define SHGEMM_KERNEL gotoblas -> shgemm_kernel
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define SHGEMM_NN shgemm_nn
|
|
||||||
#define SHGEMM_CN shgemm_tn
|
|
||||||
#define SHGEMM_TN shgemm_tn
|
|
||||||
#define SHGEMM_NC shgemm_nt
|
|
||||||
#define SHGEMM_NT shgemm_nt
|
|
||||||
#define SHGEMM_CC shgemm_tt
|
|
||||||
#define SHGEMM_CT shgemm_tt
|
|
||||||
#define SHGEMM_TC shgemm_tt
|
|
||||||
#define SHGEMM_TT shgemm_tt
|
|
||||||
#define SHGEMM_NR shgemm_nn
|
|
||||||
#define SHGEMM_TR shgemm_tn
|
|
||||||
#define SHGEMM_CR shgemm_tn
|
|
||||||
#define SHGEMM_RN shgemm_nn
|
|
||||||
#define SHGEMM_RT shgemm_nt
|
|
||||||
#define SHGEMM_RC shgemm_nt
|
|
||||||
#define SHGEMM_RR shgemm_nn
|
|
||||||
|
|
||||||
#define SHGEMM_THREAD_NN shgemm_thread_nn
|
|
||||||
#define SHGEMM_THREAD_CN shgemm_thread_tn
|
|
||||||
#define SHGEMM_THREAD_TN shgemm_thread_tn
|
|
||||||
#define SHGEMM_THREAD_NC shgemm_thread_nt
|
|
||||||
#define SHGEMM_THREAD_NT shgemm_thread_nt
|
|
||||||
#define SHGEMM_THREAD_CC shgemm_thread_tt
|
|
||||||
#define SHGEMM_THREAD_CT shgemm_thread_tt
|
|
||||||
#define SHGEMM_THREAD_TC shgemm_thread_tt
|
|
||||||
#define SHGEMM_THREAD_TT shgemm_thread_tt
|
|
||||||
#define SHGEMM_THREAD_NR shgemm_thread_nn
|
|
||||||
#define SHGEMM_THREAD_TR shgemm_thread_tn
|
|
||||||
#define SHGEMM_THREAD_CR shgemm_thread_tn
|
|
||||||
#define SHGEMM_THREAD_RN shgemm_thread_nn
|
|
||||||
#define SHGEMM_THREAD_RT shgemm_thread_nt
|
|
||||||
#define SHGEMM_THREAD_RC shgemm_thread_nt
|
|
||||||
#define SHGEMM_THREAD_RR shgemm_thread_nn
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -19,8 +19,8 @@ ifeq ($(ARCH), MIPS)
|
||||||
USE_GEMM3M = 1
|
USE_GEMM3M = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
SHBLASOBJS += shgemm_nn.$(SUFFIX) shgemm_nt.$(SUFFIX) shgemm_tn.$(SUFFIX) shgemm_tt.$(SUFFIX)
|
SBBLASOBJS += sbgemm_nn.$(SUFFIX) sbgemm_nt.$(SUFFIX) sbgemm_tn.$(SUFFIX) sbgemm_tt.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SBLASOBJS += \
|
SBLASOBJS += \
|
||||||
|
@ -207,8 +207,8 @@ COMMONOBJS += gemm_thread_m.$(SUFFIX) gemm_thread_n.$(SUFFIX) gemm_thread_mn.$(
|
||||||
COMMONOBJS += syrk_thread.$(SUFFIX)
|
COMMONOBJS += syrk_thread.$(SUFFIX)
|
||||||
|
|
||||||
ifndef USE_SIMPLE_THREADED_LEVEL3
|
ifndef USE_SIMPLE_THREADED_LEVEL3
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
SHBLASOBJS += shgemm_thread_nn.$(SUFFIX) shgemm_thread_nt.$(SUFFIX) shgemm_thread_tn.$(SUFFIX) shgemm_thread_tt.$(SUFFIX)
|
SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX)
|
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX)
|
||||||
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
|
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
|
||||||
|
@ -289,17 +289,17 @@ endif
|
||||||
|
|
||||||
all ::
|
all ::
|
||||||
|
|
||||||
shgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h
|
sbgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
||||||
|
|
||||||
shgemm_nt.$(SUFFIX) : gemm.c level3.c ../../param.h
|
sbgemm_nt.$(SUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
|
||||||
|
|
||||||
shgemm_tn.$(SUFFIX) : gemm.c level3.c ../../param.h
|
sbgemm_tn.$(SUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
|
||||||
|
|
||||||
shgemm_tt.$(SUFFIX) : gemm.c level3.c ../../param.h
|
sbgemm_tt.$(SUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
|
||||||
|
|
||||||
sgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h
|
sgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
||||||
|
@ -496,17 +496,17 @@ gemm_thread_variable.$(SUFFIX) : gemm_thread_variable.c ../../common.h
|
||||||
beta_thread.$(SUFFIX) : beta_thread.c ../../common.h
|
beta_thread.$(SUFFIX) : beta_thread.c ../../common.h
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
shgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
sbgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
||||||
|
|
||||||
shgemm_thread_nt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
sbgemm_thread_nt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
|
||||||
|
|
||||||
shgemm_thread_tn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
sbgemm_thread_tn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
|
||||||
|
|
||||||
shgemm_thread_tt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
sbgemm_thread_tt.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
|
||||||
|
|
||||||
sgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
sgemm_thread_nn.$(SUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
$(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
||||||
|
@ -2681,17 +2681,17 @@ xtrsm_RCLU.$(SUFFIX) : trsm_R.c
|
||||||
xtrsm_RCLN.$(SUFFIX) : trsm_R.c
|
xtrsm_RCLN.$(SUFFIX) : trsm_R.c
|
||||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA -UUPPER -UUNIT -DCONJ $< -o $(@F)
|
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DTRANSA -UUPPER -UUNIT -DCONJ $< -o $(@F)
|
||||||
|
|
||||||
shgemm_nn.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
sbgemm_nn.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
||||||
|
|
||||||
shgemm_nt.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
sbgemm_nt.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
|
||||||
|
|
||||||
shgemm_tn.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
sbgemm_tn.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
|
||||||
|
|
||||||
shgemm_tt.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
sbgemm_tt.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
|
||||||
|
|
||||||
sgemm_nn.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
sgemm_nn.$(PSUFFIX) : gemm.c level3.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
||||||
|
@ -2889,17 +2889,17 @@ beta_thread.$(PSUFFIX) : beta_thread.c ../../common.h
|
||||||
$(CC) -c $(PFLAGS) $< -o $(@F)
|
$(CC) -c $(PFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
|
|
||||||
shgemm_thread_nn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
sbgemm_thread_nn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
||||||
|
|
||||||
shgemm_thread_nt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
sbgemm_thread_nt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DNT $< -o $(@F)
|
||||||
|
|
||||||
shgemm_thread_tn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
sbgemm_thread_tn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTN $< -o $(@F)
|
||||||
|
|
||||||
shgemm_thread_tt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
sbgemm_thread_tt.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DHALF -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DBFLOAT16 -UDOUBLE -UCOMPLEX -DTT $< -o $(@F)
|
||||||
|
|
||||||
sgemm_thread_nn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
sgemm_thread_nn.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h
|
||||||
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
$(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -UCOMPLEX -DNN $< -o $(@F)
|
||||||
|
|
|
@ -62,10 +62,10 @@ BLASLONG gemm_offset_b = DEFAULT_GEMM_OFFSET_B;
|
||||||
BLASLONG gemm_offset_b = GEMM_OFFSET_B;
|
BLASLONG gemm_offset_b = GEMM_OFFSET_B;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if SHGEMM_P == shgemm_p
|
#if SBGEMM_P == sbgemm_p
|
||||||
BLASLONG shgemm_p = DEFAULT_GEMM_P;
|
BLASLONG sbgemm_p = DEFAULT_GEMM_P;
|
||||||
#else
|
#else
|
||||||
BLASLONG shgemm_p = SHGEMM_P;
|
BLASLONG sbgemm_p = SBGEMM_P;
|
||||||
#endif
|
#endif
|
||||||
#if SGEMM_P == sgemm_p
|
#if SGEMM_P == sgemm_p
|
||||||
BLASLONG sgemm_p = DEFAULT_GEMM_P;
|
BLASLONG sgemm_p = DEFAULT_GEMM_P;
|
||||||
|
@ -88,10 +88,10 @@ BLASLONG zgemm_p = DEFAULT_GEMM_P;
|
||||||
BLASLONG zgemm_p = ZGEMM_P;
|
BLASLONG zgemm_p = ZGEMM_P;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if SHGEMM_Q == shgemm_q
|
#if SBGEMM_Q == sbgemm_q
|
||||||
BLASLONG shgemm_q = DEFAULT_GEMM_Q;
|
BLASLONG sbgemm_q = DEFAULT_GEMM_Q;
|
||||||
#else
|
#else
|
||||||
BLASLONG shgemm_q = SHGEMM_Q;
|
BLASLONG sbgemm_q = SBGEMM_Q;
|
||||||
#endif
|
#endif
|
||||||
#if SGEMM_Q == sgemm_q
|
#if SGEMM_Q == sgemm_q
|
||||||
BLASLONG sgemm_q = DEFAULT_GEMM_Q;
|
BLASLONG sgemm_q = DEFAULT_GEMM_Q;
|
||||||
|
@ -114,10 +114,10 @@ BLASLONG zgemm_q = DEFAULT_GEMM_Q;
|
||||||
BLASLONG zgemm_q = ZGEMM_Q;
|
BLASLONG zgemm_q = ZGEMM_Q;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if SHGEMM_R == shgemm_r
|
#if SBGEMM_R == sbgemm_r
|
||||||
BLASLONG shgemm_r = DEFAULT_GEMM_R;
|
BLASLONG sbgemm_r = DEFAULT_GEMM_R;
|
||||||
#else
|
#else
|
||||||
BLASLONG shgemm_r = SHGEMM_R;
|
BLASLONG sbgemm_r = SBGEMM_R;
|
||||||
#endif
|
#endif
|
||||||
#if SGEMM_R == sgemm_r
|
#if SGEMM_R == sgemm_r
|
||||||
BLASLONG sgemm_r = DEFAULT_GEMM_R;
|
BLASLONG sgemm_r = DEFAULT_GEMM_R;
|
||||||
|
@ -612,7 +612,7 @@ void blas_set_parameter(void){
|
||||||
|
|
||||||
size = BITMASK(cpuid3, 16, 0xff);
|
size = BITMASK(cpuid3, 16, 0xff);
|
||||||
|
|
||||||
shgemm_p = 192 * (size + 1);
|
sbgemm_p = 192 * (size + 1);
|
||||||
sgemm_p = 192 * (size + 1);
|
sgemm_p = 192 * (size + 1);
|
||||||
dgemm_p = 96 * (size + 1);
|
dgemm_p = 96 * (size + 1);
|
||||||
cgemm_p = 96 * (size + 1);
|
cgemm_p = 96 * (size + 1);
|
||||||
|
@ -626,7 +626,7 @@ void blas_set_parameter(void){
|
||||||
xgemm_p = 16 * (size + 1);
|
xgemm_p = 16 * (size + 1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
shgemm_r = (((BUFFER_SIZE - ((SHGEMM_P * SHGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SHGEMM_Q * 4)) - 15) & ~15;
|
sbgemm_r = (((BUFFER_SIZE - ((SBGEMM_P * SBGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SBGEMM_Q * 4)) - 15) & ~15;
|
||||||
sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q * 4)) - 15) & ~15;
|
sgemm_r = (((BUFFER_SIZE - ((SGEMM_P * SGEMM_Q * 4 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (SGEMM_Q * 4)) - 15) & ~15;
|
||||||
dgemm_r = (((BUFFER_SIZE - ((DGEMM_P * DGEMM_Q * 8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (DGEMM_Q * 8)) - 15) & ~15;
|
dgemm_r = (((BUFFER_SIZE - ((DGEMM_P * DGEMM_Q * 8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (DGEMM_Q * 8)) - 15) & ~15;
|
||||||
cgemm_r = (((BUFFER_SIZE - ((CGEMM_P * CGEMM_Q * 8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (CGEMM_Q * 8)) - 15) & ~15;
|
cgemm_r = (((BUFFER_SIZE - ((CGEMM_P * CGEMM_Q * 8 + GEMM_OFFSET_A + GEMM_ALIGN) & ~GEMM_ALIGN)) / (CGEMM_Q * 8)) - 15) & ~15;
|
||||||
|
|
|
@ -30,8 +30,8 @@ ifndef BUILD_LAPACK_DEPRECATED
|
||||||
BUILD_LAPACK_DEPRECATED = 0
|
BUILD_LAPACK_DEPRECATED = 0
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef BUILD_HALF
|
ifndef BUILD_BFLOAT16
|
||||||
BUILD_HALF = 0
|
BUILD_BFLOAT16 = 0
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
|
@ -246,23 +246,23 @@ static : ../$(LIBNAME)
|
||||||
rm -f goto.$(SUFFIX)
|
rm -f goto.$(SUFFIX)
|
||||||
|
|
||||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
|
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) > $(@F)
|
||||||
|
|
||||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
|
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) > $(@F)
|
||||||
|
|
||||||
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
|
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) > $(@F)
|
||||||
|
|
||||||
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
|
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) > $(@F)
|
||||||
|
|
||||||
test : linktest.c
|
test : linktest.c
|
||||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||||
rm -f linktest
|
rm -f linktest
|
||||||
|
|
||||||
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > linktest.c
|
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) > linktest.c
|
||||||
|
|
||||||
clean ::
|
clean ::
|
||||||
@rm -f *.def *.dylib __.SYMDEF* *.renamed
|
@rm -f *.def *.dylib __.SYMDEF* *.renamed
|
||||||
|
|
|
@ -46,7 +46,7 @@
|
||||||
ssum, dsum, scsum, dzsum
|
ssum, dsum, scsum, dzsum
|
||||||
);
|
);
|
||||||
|
|
||||||
@halfblasobjs = (shgemm);
|
@halfblasobjs = (sbgemm);
|
||||||
@cblasobjs = (
|
@cblasobjs = (
|
||||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
||||||
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
|
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
|
||||||
|
@ -84,7 +84,7 @@
|
||||||
cblas_xerbla
|
cblas_xerbla
|
||||||
);
|
);
|
||||||
|
|
||||||
@halfcblasobjs = (cblas_shgemm);
|
@halfcblasobjs = (cblas_sbgemm);
|
||||||
|
|
||||||
@exblasobjs = (
|
@exblasobjs = (
|
||||||
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
|
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
|
||||||
|
|
|
@ -9,8 +9,8 @@
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
|
|
||||||
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) {
|
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) {
|
||||||
printf("SHGEMM_UNROLL_M=%d\n", SHGEMM_DEFAULT_UNROLL_M);
|
printf("SBGEMM_UNROLL_M=%d\n", SBGEMM_DEFAULT_UNROLL_M);
|
||||||
printf("SHGEMM_UNROLL_N=%d\n", SHGEMM_DEFAULT_UNROLL_N);
|
printf("SBGEMM_UNROLL_N=%d\n", SBGEMM_DEFAULT_UNROLL_N);
|
||||||
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
|
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
|
||||||
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
|
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
|
||||||
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
|
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
|
||||||
|
|
|
@ -46,8 +46,8 @@ SBLAS3OBJS = \
|
||||||
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
|
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
|
||||||
sgeadd.$(SUFFIX)
|
sgeadd.$(SUFFIX)
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
SHBLAS3OBJS = shgemm.$(SUFFIX)
|
SBBLAS3OBJS = sbgemm.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
DBLAS1OBJS = \
|
DBLAS1OBJS = \
|
||||||
|
@ -280,8 +280,8 @@ CSBLAS3OBJS = \
|
||||||
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
|
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
|
||||||
cblas_sgeadd.$(SUFFIX)
|
cblas_sgeadd.$(SUFFIX)
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
CSHBLAS3OBJS = cblas_shgemm.$(SUFFIX)
|
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
CDBLAS1OBJS = \
|
CDBLAS1OBJS = \
|
||||||
|
@ -374,7 +374,7 @@ override CFLAGS += -I.
|
||||||
SBLAS1OBJS += $(CSBLAS1OBJS)
|
SBLAS1OBJS += $(CSBLAS1OBJS)
|
||||||
SBLAS2OBJS += $(CSBLAS2OBJS)
|
SBLAS2OBJS += $(CSBLAS2OBJS)
|
||||||
SBLAS3OBJS += $(CSBLAS3OBJS)
|
SBLAS3OBJS += $(CSBLAS3OBJS)
|
||||||
SHBLAS3OBJS += $(CSHBLAS3OBJS)
|
SBBLAS3OBJS += $(CSBBLAS3OBJS)
|
||||||
DBLAS1OBJS += $(CDBLAS1OBJS)
|
DBLAS1OBJS += $(CDBLAS1OBJS)
|
||||||
DBLAS2OBJS += $(CDBLAS2OBJS)
|
DBLAS2OBJS += $(CDBLAS2OBJS)
|
||||||
DBLAS3OBJS += $(CDBLAS3OBJS)
|
DBLAS3OBJS += $(CDBLAS3OBJS)
|
||||||
|
@ -388,7 +388,7 @@ ZBLAS3OBJS += $(CZBLAS3OBJS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
|
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
|
||||||
SHBLASOBJS = $(SHBLAS3OBJS)
|
SBBLASOBJS = $(SBBLAS3OBJS)
|
||||||
DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS)
|
DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS)
|
||||||
QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS)
|
QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS)
|
||||||
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
||||||
|
@ -463,7 +463,7 @@ ZBLASOBJS += $(ZLAPACKOBJS)
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
FUNCOBJS = $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
FUNCOBJS = $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
||||||
|
|
||||||
ifdef EXPRECISION
|
ifdef EXPRECISION
|
||||||
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||||
|
@ -497,10 +497,10 @@ level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $
|
||||||
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
|
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
|
||||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||||
|
|
||||||
level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
|
level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
|
||||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||||
|
|
||||||
$(CSHBLASOBJS) $(CSHBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
|
$(CSBBLASOBJS) $(CSBBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
|
||||||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS
|
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS
|
||||||
|
|
||||||
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
|
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
|
||||||
|
@ -1218,8 +1218,8 @@ zhpr2.$(SUFFIX) zhpr2.$(PSUFFIX) : zhpr2.c
|
||||||
xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
|
xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
shgemm.$(SUFFIX) shgemm.$(PSUFFIX) : gemm.c ../param.h
|
sbgemm.$(SUFFIX) sbgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -1784,8 +1784,8 @@ cblas_zhemv.$(SUFFIX) cblas_zhemv.$(PSUFFIX) : zhemv.c
|
||||||
cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h
|
cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
cblas_shgemm.$(SUFFIX) cblas_shgemm.$(PSUFFIX) : gemm.c ../param.h
|
cblas_sbgemm.$(SUFFIX) cblas_sbgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
@ -41,8 +41,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
foreach (float_type ${FLOAT_TYPES})
|
foreach (float_type ${FLOAT_TYPES})
|
||||||
# a bit of metaprogramming here to pull out the appropriate KERNEL var
|
# a bit of metaprogramming here to pull out the appropriate KERNEL var
|
||||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "BFLOAT16")
|
||||||
set (float_char "SH")
|
set (float_char "SB")
|
||||||
endif ()
|
endif ()
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type})
|
||||||
|
@ -96,8 +96,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
|
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
|
||||||
foreach (float_type ${FLOAT_TYPES})
|
foreach (float_type ${FLOAT_TYPES})
|
||||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "BFLOAT16")
|
||||||
set (float_char "SH")
|
set (float_char "SB")
|
||||||
endif ()
|
endif ()
|
||||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type})
|
||||||
|
@ -134,13 +134,13 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
set(USE_TRMM true)
|
set(USE_TRMM true)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
foreach (float_type SINGLE DOUBLE HALF)
|
foreach (float_type SINGLE DOUBLE BFLOAT16)
|
||||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "BFLOAT16")
|
||||||
if (NOT ${BUILD_HALF})
|
if (NOT ${BUILD_BFLOAT16})
|
||||||
continue ()
|
continue ()
|
||||||
else ()
|
else ()
|
||||||
set (float_char "SH")
|
set (float_char "SB")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
|
||||||
|
@ -148,8 +148,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
|
|
||||||
foreach (float_type ${FLOAT_TYPES})
|
foreach (float_type ${FLOAT_TYPES})
|
||||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "BFLOAT16")
|
||||||
set (float_char "SH")
|
set (float_char "SB")
|
||||||
endif ()
|
endif ()
|
||||||
if (${float_char}GEMMINCOPY)
|
if (${float_char}GEMMINCOPY)
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type})
|
||||||
|
@ -490,8 +490,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
# Makefile.LA
|
# Makefile.LA
|
||||||
if(NOT NO_LAPACK)
|
if(NOT NO_LAPACK)
|
||||||
foreach (float_type ${FLOAT_TYPES})
|
foreach (float_type ${FLOAT_TYPES})
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "BFLOAT16")
|
||||||
set (float_char "SH")
|
set (float_char "SB")
|
||||||
endif ()
|
endif ()
|
||||||
if (NOT DEFINED ${float_char}NEG_TCOPY)
|
if (NOT DEFINED ${float_char}NEG_TCOPY)
|
||||||
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X")
|
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X")
|
||||||
|
@ -536,8 +536,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
foreach (float_type ${FLOAT_TYPES})
|
foreach (float_type ${FLOAT_TYPES})
|
||||||
# a bit of metaprogramming here to pull out the appropriate KERNEL var
|
# a bit of metaprogramming here to pull out the appropriate KERNEL var
|
||||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "BFLOAT16")
|
||||||
set (float_char "SH")
|
set (float_char "SB")
|
||||||
endif ()
|
endif ()
|
||||||
GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type})
|
GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type})
|
||||||
GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type})
|
GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type})
|
||||||
|
|
|
@ -65,24 +65,24 @@ ifeq ($(CORE), Z14)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF), 1)
|
ifeq ($(BUILD_BFLOAT16), 1)
|
||||||
ifndef SHGEMMKERNEL
|
ifndef SBGEMMKERNEL
|
||||||
SHGEMM_BETA = ../generic/gemm_beta.c
|
SBGEMM_BETA = ../generic/gemm_beta.c
|
||||||
SHGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
SBGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
SHGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
SBGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||||
SHGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
SBGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||||
SHGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
SBGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
SHGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
SBGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX)
|
SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SHKERNELOBJS += \
|
SHKERNELOBJS += \
|
||||||
shgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
sbgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
||||||
$(SHGEMMINCOPYOBJ) $(SHGEMMITCOPYOBJ) \
|
$(SBGEMMINCOPYOBJ) $(SBGEMMITCOPYOBJ) \
|
||||||
$(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ)
|
$(SBGEMMONCOPYOBJ) $(SBGEMMOTCOPYOBJ)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SKERNELOBJS += \
|
SKERNELOBJS += \
|
||||||
|
@ -118,8 +118,8 @@ XKERNELOBJS += \
|
||||||
$(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \
|
$(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \
|
||||||
$(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ)
|
$(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ)
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
SHBLASOBJS += $(SHKERNELOBJS)
|
SBBLASOBJS += $(SHKERNELOBJS)
|
||||||
endif
|
endif
|
||||||
SBLASOBJS += $(SKERNELOBJS)
|
SBLASOBJS += $(SKERNELOBJS)
|
||||||
DBLASOBJS += $(DKERNELOBJS)
|
DBLASOBJS += $(DKERNELOBJS)
|
||||||
|
@ -128,8 +128,8 @@ CBLASOBJS += $(CKERNELOBJS)
|
||||||
ZBLASOBJS += $(ZKERNELOBJS)
|
ZBLASOBJS += $(ZKERNELOBJS)
|
||||||
XBLASOBJS += $(XKERNELOBJS)
|
XBLASOBJS += $(XKERNELOBJS)
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX)
|
SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SBLASOBJS += \
|
SBLASOBJS += \
|
||||||
|
@ -421,11 +421,11 @@ ZBLASOBJS += \
|
||||||
zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
|
zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
|
||||||
zgeadd_k$(TSUFFIX).$(SUFFIX)
|
zgeadd_k$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF), 1)
|
ifeq ($(BUILD_BFLOAT16), 1)
|
||||||
SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SBGEMMINCOPYOBJ_P = $(SBGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
SHGEMMITCOPYOBJ_P = $(SHGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SBGEMMITCOPYOBJ_P = $(SBGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
SHGEMMONCOPYOBJ_P = $(SHGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SBGEMMONCOPYOBJ_P = $(SBGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
SHGEMMOTCOPYOBJ_P = $(SHGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SBGEMMOTCOPYOBJ_P = $(SBGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
@ -453,9 +453,9 @@ XGEMMITCOPYOBJ_P = $(XGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
$(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
|
$(KDIR)sbgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA)
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
||||||
|
@ -477,35 +477,35 @@ $(KDIR)xgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
|
||||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF), 1)
|
ifeq ($(BUILD_BFLOAT16), 1)
|
||||||
|
|
||||||
$(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY)
|
$(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY)
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
$(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY)
|
$(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY)
|
||||||
|
|
||||||
ifeq ($(OS), AIX)
|
ifeq ($(OS), AIX)
|
||||||
$(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmotcopy.s
|
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s
|
||||||
m4 shgemmotcopy.s > shgemmotcopy_nomacros.s
|
m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@
|
||||||
rm shgemmotcopy.s shgemmotcopy_nomacros.s
|
rm sbgemmotcopy.s sbgemmotcopy_nomacros.s
|
||||||
else
|
else
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
|
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
|
||||||
|
|
||||||
$(KDIR)$(SHGEMMINCOPYOBJ) : $(KERNELDIR)/$(SHGEMMINCOPY)
|
$(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY)
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
$(KDIR)$(SHGEMMITCOPYOBJ) : $(KERNELDIR)/$(SHGEMMITCOPY)
|
$(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY)
|
||||||
ifeq ($(OS), AIX)
|
ifeq ($(OS), AIX)
|
||||||
$(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmitcopy.s
|
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s
|
||||||
m4 shgemmitcopy.s > shgemmitcopy_nomacros.s
|
m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@
|
||||||
rm shgemmitcopy.s shgemmitcopy_nomacros.s
|
rm sbgemmitcopy.s sbgemmitcopy_nomacros.s
|
||||||
else
|
else
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
@ -668,16 +668,16 @@ else
|
||||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF), 1)
|
ifeq ($(BUILD_BFLOAT16), 1)
|
||||||
|
|
||||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
|
$(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND)
|
||||||
ifeq ($(OS), AIX)
|
ifeq ($(OS), AIX)
|
||||||
$(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemm_kernel$(TSUFFIX).s
|
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s
|
||||||
m4 shgemm_kernel$(TSUFFIX).s > shgemm_kernel$(TSUFFIX)_nomacros.s
|
m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
||||||
rm shgemm_kernel$(TSUFFIX).s shgemm_kernel$(TSUFFIX)_nomacros.s
|
rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s
|
||||||
else
|
else
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -2297,9 +2297,9 @@ $(KDIR)xtrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_
|
||||||
$(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
$(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
||||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
$(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
|
$(KDIR)sbgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA)
|
$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA)
|
||||||
|
@ -2318,19 +2318,19 @@ $(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
|
||||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF), 1)
|
ifeq ($(BUILD_BFLOAT16), 1)
|
||||||
$(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY)
|
$(SBGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMONCOPY)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
$(SHGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMOTCOPY)
|
$(SBGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMOTCOPY)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
|
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
|
||||||
$(SHGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMINCOPY)
|
$(SBGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMINCOPY)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
$(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY)
|
$(SBGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMITCOPY)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
@ -2440,9 +2440,9 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF), 1)
|
ifeq ($(BUILD_BFLOAT16), 1)
|
||||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
|
$(KDIR)sbgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
|
$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#if defined(HALF) && defined(HALFCONVERSION)
|
#if defined(BFLOAT16) && defined(BFLOAT16CONVERSION)
|
||||||
static float
|
static float
|
||||||
bfloat16tof32 (bfloat16 f16)
|
bfloat16tof32 (bfloat16 f16)
|
||||||
{
|
{
|
||||||
|
|
|
@ -7,16 +7,16 @@ else
|
||||||
#CGEMM_BETA = ../generic/zgemm_beta.c
|
#CGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
#ZGEMM_BETA = ../generic/zgemm_beta.c
|
#ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
|
||||||
SHGEMM_BETA = ../generic/gemm_beta.c
|
SBGEMM_BETA = ../generic/gemm_beta.c
|
||||||
SHGEMMKERNEL = shgemm_kernel_power10.c
|
SBGEMMKERNEL = sbgemm_kernel_power10.c
|
||||||
SHGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
SBGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||||
SHGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
SBGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
||||||
SHGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
SBGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||||
SHGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
SBGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||||
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX)
|
SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
STRMMKERNEL = sgemm_kernel_power10.c
|
STRMMKERNEL = sgemm_kernel_power10.c
|
||||||
DTRMMKERNEL = dgemm_kernel_power10.c
|
DTRMMKERNEL = dgemm_kernel_power10.c
|
||||||
|
|
|
@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
**********************************************************************************/
|
**********************************************************************************/
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include <altivec.h>
|
#include <altivec.h>
|
||||||
#if defined(HALF) && defined(HALFCONVERSION)
|
#if defined(BFLOAT16) && defined(BFLOAT16CONVERSION)
|
||||||
static float
|
static float
|
||||||
bfloat16tof32 (bfloat16 f16)
|
bfloat16tof32 (bfloat16 f16)
|
||||||
{
|
{
|
||||||
|
@ -131,7 +131,7 @@ vector char mask =
|
||||||
|
|
||||||
#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory");
|
#define PREFETCH1(x, y) asm volatile ("dcbt %0, %1" : : "r" (x), "b" (y) : "memory");
|
||||||
/*************************************************************************************
|
/*************************************************************************************
|
||||||
* SHGEMM Kernel
|
* SBGEMM Kernel
|
||||||
*************************************************************************************/
|
*************************************************************************************/
|
||||||
int
|
int
|
||||||
CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
|
@ -53,13 +53,13 @@ gotoblas_t TABLE_NAME = {
|
||||||
|
|
||||||
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
|
SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
|
||||||
#ifdef SHGEMM_DEFAULT_UNROLL_MN
|
#ifdef SBGEMM_DEFAULT_UNROLL_MN
|
||||||
SHGEMM_DEFAULT_UNROLL_MN,
|
SBGEMM_DEFAULT_UNROLL_MN,
|
||||||
#else
|
#else
|
||||||
MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
|
MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
samax_kTS, samin_kTS, smax_kTS, smin_kTS,
|
samax_kTS, samin_kTS, smax_kTS, smin_kTS,
|
||||||
|
@ -70,13 +70,13 @@ gotoblas_t TABLE_NAME = {
|
||||||
sgemv_nTS, sgemv_tTS, sger_kTS,
|
sgemv_nTS, sgemv_tTS, sger_kTS,
|
||||||
ssymv_LTS, ssymv_UTS,
|
ssymv_LTS, ssymv_UTS,
|
||||||
|
|
||||||
shgemm_kernelTS, shgemm_betaTS,
|
sbgemm_kernelTS, sbgemm_betaTS,
|
||||||
#if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
|
#if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
|
||||||
shgemm_incopyTS, shgemm_itcopyTS,
|
sbgemm_incopyTS, sbgemm_itcopyTS,
|
||||||
#else
|
#else
|
||||||
shgemm_oncopyTS, shgemm_otcopyTS,
|
sbgemm_oncopyTS, sbgemm_otcopyTS,
|
||||||
#endif
|
#endif
|
||||||
shgemm_oncopyTS, shgemm_otcopyTS,
|
sbgemm_oncopyTS, sbgemm_otcopyTS,
|
||||||
|
|
||||||
strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
|
strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
|
||||||
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
|
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
|
||||||
|
@ -707,24 +707,24 @@ gotoblas_t TABLE_NAME = {
|
||||||
|
|
||||||
#if defined(ARCH_ARM64)
|
#if defined(ARCH_ARM64)
|
||||||
static void init_parameter(void) {
|
static void init_parameter(void) {
|
||||||
#if defined(BUILD_HALF)
|
#if defined(BUILD_BFLOAT16)
|
||||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||||
|
|
||||||
#if defined(BUILD_HALF)
|
#if defined(BUILD_BFLOAT16)
|
||||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
|
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
|
||||||
|
|
||||||
#if defined(BUILD_HALF)
|
#if defined(BUILD_BFLOAT16)
|
||||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||||
|
@ -789,16 +789,16 @@ static void init_parameter(void) {
|
||||||
#if defined(ARCH_POWER)
|
#if defined(ARCH_POWER)
|
||||||
static void init_parameter(void) {
|
static void init_parameter(void) {
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||||
|
@ -806,8 +806,8 @@ static void init_parameter(void) {
|
||||||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||||
|
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
|
@ -818,16 +818,16 @@ static void init_parameter(void) {
|
||||||
|
|
||||||
#if defined(ARCH_ZARCH)
|
#if defined(ARCH_ZARCH)
|
||||||
static void init_parameter(void) {
|
static void init_parameter(void) {
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||||
|
@ -835,8 +835,8 @@ static void init_parameter(void) {
|
||||||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||||
|
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
|
@ -977,10 +977,10 @@ static void init_parameter(void) {
|
||||||
(void) l2; /* dirty trick to suppress unused variable warning for targets */
|
(void) l2; /* dirty trick to suppress unused variable warning for targets */
|
||||||
/* where the GEMM unrolling parameters do not depend on l2 */
|
/* where the GEMM unrolling parameters do not depend on l2 */
|
||||||
|
|
||||||
#ifdef BUILD_HALF
|
#ifdef BUILD_BFLOAT16
|
||||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||||
#endif
|
#endif
|
||||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
include_directories(${PROJECT_SOURCE_DIR})
|
include_directories(${PROJECT_SOURCE_DIR})
|
||||||
include_directories(${PROJECT_BINARY_DIR})
|
include_directories(${PROJECT_BINARY_DIR})
|
||||||
|
|
||||||
list (REMOVE_ITEM FLOAT_TYPES "HALF")
|
list (REMOVE_ITEM FLOAT_TYPES "BFLOAT16")
|
||||||
|
|
||||||
set(LAPACK_SOURCES
|
set(LAPACK_SOURCES
|
||||||
potrf/potrf_U_single.c
|
potrf/potrf_U_single.c
|
||||||
|
@ -46,7 +46,7 @@ GenerateNamedObjects("laswp/generic/laswp_k_4.c" "" "laswp_plus" false "" "" fa
|
||||||
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3)
|
GenerateNamedObjects("laswp/generic/laswp_k_4.c" "MINUS" "laswp_minus" false "" "" false 3)
|
||||||
|
|
||||||
foreach (float_type ${FLOAT_TYPES})
|
foreach (float_type ${FLOAT_TYPES})
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "BFLOAT16")
|
||||||
continue()
|
continue()
|
||||||
endif()
|
endif()
|
||||||
GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type})
|
GenerateNamedObjects("getrf/getrf_single.c" "UNIT" "getrf_single" false "" "" false ${float_type})
|
||||||
|
|
|
@ -380,9 +380,9 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){
|
||||||
#elif defined(DOUBLE)
|
#elif defined(DOUBLE)
|
||||||
mode = BLAS_DOUBLE | BLAS_REAL;
|
mode = BLAS_DOUBLE | BLAS_REAL;
|
||||||
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1;
|
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1;
|
||||||
#elif defined(HALF)
|
#elif defined(BFLOAT16)
|
||||||
mode = BLAS_HALF | BLAS_REAL;
|
mode = BLAS_BFLOAT16 | BLAS_REAL;
|
||||||
mask = MAX(SHGEMM_UNROLL_M, SHGEMM_UNROLL_N) - 1;
|
mask = MAX(SBGEMM_UNROLL_M, SBGEMM_UNROLL_N) - 1;
|
||||||
#else
|
#else
|
||||||
mode = BLAS_SINGLE | BLAS_REAL;
|
mode = BLAS_SINGLE | BLAS_REAL;
|
||||||
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1;
|
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1;
|
||||||
|
|
32
param.h
32
param.h
|
@ -72,12 +72,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#ifndef PARAM_H
|
#ifndef PARAM_H
|
||||||
#define PARAM_H
|
#define PARAM_H
|
||||||
|
|
||||||
#define SHGEMM_DEFAULT_UNROLL_N 4
|
#define SBGEMM_DEFAULT_UNROLL_N 4
|
||||||
#define SHGEMM_DEFAULT_UNROLL_M 8
|
#define SBGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define SHGEMM_DEFAULT_UNROLL_MN 32
|
#define SBGEMM_DEFAULT_UNROLL_MN 32
|
||||||
#define SHGEMM_DEFAULT_P 256
|
#define SBGEMM_DEFAULT_P 256
|
||||||
#define SHGEMM_DEFAULT_R 256
|
#define SBGEMM_DEFAULT_R 256
|
||||||
#define SHGEMM_DEFAULT_Q 256
|
#define SBGEMM_DEFAULT_Q 256
|
||||||
#ifdef OPTERON
|
#ifdef OPTERON
|
||||||
|
|
||||||
#define SNUMOPT 4
|
#define SNUMOPT 4
|
||||||
|
@ -2308,16 +2308,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER10)
|
#if defined(POWER10)
|
||||||
#undef SHGEMM_DEFAULT_UNROLL_N
|
#undef SBGEMM_DEFAULT_UNROLL_N
|
||||||
#undef SHGEMM_DEFAULT_UNROLL_M
|
#undef SBGEMM_DEFAULT_UNROLL_M
|
||||||
#undef SHGEMM_DEFAULT_P
|
#undef SBGEMM_DEFAULT_P
|
||||||
#undef SHGEMM_DEFAULT_R
|
#undef SBGEMM_DEFAULT_R
|
||||||
#undef SHGEMM_DEFAULT_Q
|
#undef SBGEMM_DEFAULT_Q
|
||||||
#define SHGEMM_DEFAULT_UNROLL_M 16
|
#define SBGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SHGEMM_DEFAULT_UNROLL_N 8
|
#define SBGEMM_DEFAULT_UNROLL_N 8
|
||||||
#define SHGEMM_DEFAULT_P 832
|
#define SBGEMM_DEFAULT_P 832
|
||||||
#define SHGEMM_DEFAULT_Q 1026
|
#define SBGEMM_DEFAULT_Q 1026
|
||||||
#define SHGEMM_DEFAULT_R 4096
|
#define SBGEMM_DEFAULT_R 4096
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(SPARC) && defined(V7)
|
#if defined(SPARC) && defined(V7)
|
||||||
|
|
|
@ -64,15 +64,15 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
level3 : test_shgemm sblat3 dblat3 cblat3 zblat3
|
level3 : test_sbgemm sblat3 dblat3 cblat3 zblat3
|
||||||
else
|
else
|
||||||
level3 : sblat3 dblat3 cblat3 zblat3
|
level3 : sblat3 dblat3 cblat3 zblat3
|
||||||
endif
|
endif
|
||||||
ifndef CROSS
|
ifndef CROSS
|
||||||
rm -f ?BLAT3.SUMM
|
rm -f ?BLAT3.SUMM
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_shgemm > SHBLAT3.SUMM
|
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_sbgemm > SHBLAT3.SUMM
|
||||||
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
|
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
|
||||||
endif
|
endif
|
||||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat
|
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat
|
||||||
|
@ -86,8 +86,8 @@ endif
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
rm -f ?BLAT3.SUMM
|
rm -f ?BLAT3.SUMM
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
OMP_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM
|
OMP_NUM_THREADS=2 ./test_sbgemm > SHBLAT3.SUMM
|
||||||
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
|
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
|
||||||
endif
|
endif
|
||||||
OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat
|
OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat
|
||||||
|
@ -99,8 +99,8 @@ endif
|
||||||
OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat
|
OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat
|
||||||
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0
|
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0
|
||||||
else
|
else
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
OPENBLAS_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM
|
OPENBLAS_NUM_THREADS=2 ./test_sbgemm > SHBLAT3.SUMM
|
||||||
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
|
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
|
||||||
endif
|
endif
|
||||||
OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat
|
OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat
|
||||||
|
@ -181,9 +181,9 @@ zblat2 : zblat2.$(SUFFIX) ../$(LIBNAME)
|
||||||
sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME)
|
sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME)
|
||||||
$(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
$(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
||||||
|
|
||||||
ifeq ($(BUILD_HALF),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
test_shgemm : compare_sgemm_shgemm.c ../$(LIBNAME)
|
test_sbgemm : compare_sgemm_sbgemm.c ../$(LIBNAME)
|
||||||
$(FC) $(FLDFLAGS) -o test_shgemm compare_sgemm_shgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
$(FC) $(FLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME)
|
dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME)
|
||||||
|
@ -208,7 +208,7 @@ clean:
|
||||||
@rm -f *.$(SUFFIX) *.$(PSUFFIX) gmon.$(SUFFIX)ut *.SUMM *.cxml *.exe *.pdb *.dwf \
|
@rm -f *.$(SUFFIX) *.$(PSUFFIX) gmon.$(SUFFIX)ut *.SUMM *.cxml *.exe *.pdb *.dwf \
|
||||||
sblat1 dblat1 cblat1 zblat1 \
|
sblat1 dblat1 cblat1 zblat1 \
|
||||||
sblat2 dblat2 cblat2 zblat2 \
|
sblat2 dblat2 cblat2 zblat2 \
|
||||||
test_shgemm sblat3 dblat3 cblat3 zblat3 \
|
test_sbgemm sblat3 dblat3 cblat3 zblat3 \
|
||||||
sblat1p dblat1p cblat1p zblat1p \
|
sblat1p dblat1p cblat1p zblat1p \
|
||||||
sblat2p dblat2p cblat2p zblat2p \
|
sblat2p dblat2p cblat2p zblat2p \
|
||||||
sblat3p dblat3p cblat3p zblat3p \
|
sblat3p dblat3p cblat3p zblat3p \
|
||||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include "../common.h"
|
#include "../common.h"
|
||||||
#define SGEMM BLASFUNC(sgemm)
|
#define SGEMM BLASFUNC(sgemm)
|
||||||
#define SHGEMM BLASFUNC(shgemm)
|
#define SBGEMM BLASFUNC(sbgemm)
|
||||||
typedef union
|
typedef union
|
||||||
{
|
{
|
||||||
unsigned short v;
|
unsigned short v;
|
||||||
|
@ -102,7 +102,7 @@ main (int argc, char *argv[])
|
||||||
}
|
}
|
||||||
SGEMM (&transA, &transB, &m, &n, &k, &alpha, A,
|
SGEMM (&transA, &transB, &m, &n, &k, &alpha, A,
|
||||||
&m, B, &k, &beta, C, &m);
|
&m, B, &k, &beta, C, &m);
|
||||||
SHGEMM (&transA, &transB, &m, &n, &k, &alpha, AA,
|
SBGEMM (&transA, &transB, &m, &n, &k, &alpha, AA,
|
||||||
&m, BB, &k, &beta, CC, &m);
|
&m, BB, &k, &beta, CC, &m);
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
for (j = 0; j < m; j++)
|
for (j = 0; j < m; j++)
|
||||||
|
@ -126,6 +126,6 @@ main (int argc, char *argv[])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (ret != 0)
|
if (ret != 0)
|
||||||
fprintf (stderr, "FATAL ERROR SHGEMM - Return code: %d\n", ret);
|
fprintf (stderr, "FATAL ERROR SBGEMM - Return code: %d\n", ret);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
Loading…
Reference in New Issue