Change "HALF" and "sh" to "BFLOAT16" and "sb"
This commit is contained in:
parent
756062afa5
commit
3aecafad80
|
@ -41,8 +41,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
foreach (float_type ${FLOAT_TYPES})
|
||||
# a bit of metaprogramming here to pull out the appropriate KERNEL var
|
||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||
if (${float_type} STREQUAL "HALF")
|
||||
set (float_char "SH")
|
||||
if (${float_type} STREQUAL "BFLOAT16")
|
||||
set (float_char "SB")
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false ${float_type})
|
||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false ${float_type})
|
||||
|
@ -149,8 +149,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
|
||||
foreach (float_type ${FLOAT_TYPES})
|
||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||
if (${float_type} STREQUAL "HALF")
|
||||
set (float_char "SH")
|
||||
if (${float_type} STREQUAL "BFLOAT16")
|
||||
set (float_char "SB")
|
||||
endif ()
|
||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GERUKERNEL}" "" "geru_k" false "" "" false ${float_type})
|
||||
|
@ -208,13 +208,13 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
GenerateNamedObjects("${KERNELDIR}/${SGEMMDIRECTPERFORMANT}" "" "gemm_direct_performant" false "" "" false SINGLE)
|
||||
endif()
|
||||
|
||||
foreach (float_type SINGLE DOUBLE HALF)
|
||||
foreach (float_type SINGLE DOUBLE BFLOAT16)
|
||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||
if (${float_type} STREQUAL "HALF")
|
||||
if (NOT ${BUILD_HALF})
|
||||
if (${float_type} STREQUAL "BFLOAT16")
|
||||
if (NOT ${BUILD_BFLOAT16})
|
||||
continue ()
|
||||
else ()
|
||||
set (float_char "SH")
|
||||
set (float_char "SB")
|
||||
endif ()
|
||||
endif ()
|
||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
|
||||
|
@ -254,8 +254,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
|
||||
foreach (float_type ${FLOAT_TYPES})
|
||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||
if (${float_type} STREQUAL "HALF")
|
||||
set (float_char "SH")
|
||||
if (${float_type} STREQUAL "BFLOAT16")
|
||||
set (float_char "SB")
|
||||
endif ()
|
||||
if (${float_char}GEMMINCOPY)
|
||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMINCOPY}" "${float_type}" "${${float_char}GEMMINCOPYOBJ}" false "" "" true ${float_type})
|
||||
|
@ -620,8 +620,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
# Makefile.LA
|
||||
if(NOT NO_LAPACK)
|
||||
foreach (float_type ${FLOAT_TYPES})
|
||||
if (${float_type} STREQUAL "HALF")
|
||||
set (float_char "SH")
|
||||
if (${float_type} STREQUAL "BFLOAT16")
|
||||
set (float_char "SB")
|
||||
endif ()
|
||||
if (NOT DEFINED ${float_char}NEG_TCOPY)
|
||||
if (${float_char} STREQUAL "Z" OR ${float_char} STREQUAL "C" OR ${float_char} STREQUAL "X")
|
||||
|
@ -688,8 +688,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
foreach (float_type ${FLOAT_TYPES})
|
||||
# a bit of metaprogramming here to pull out the appropriate KERNEL var
|
||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||
if (${float_type} STREQUAL "HALF")
|
||||
set (float_char "SH")
|
||||
if (${float_type} STREQUAL "BFLOAT16")
|
||||
set (float_char "SB")
|
||||
endif ()
|
||||
GenerateNamedObjects("generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false ${float_type})
|
||||
GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type})
|
||||
|
|
|
@ -262,9 +262,9 @@ ifndef XDOTKERNEL
|
|||
XDOTKERNEL = zdot.S
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_HALF),1)
|
||||
ifndef SHDOTKERNEL
|
||||
SHDOTKERNEL = ../x86_64/shdot.c
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
ifndef SBDOTKERNEL
|
||||
SBDOTKERNEL = ../x86_64/sbdot.c
|
||||
endif
|
||||
|
||||
ifndef TOBF16KERNEL
|
||||
|
@ -530,11 +530,11 @@ XBLASOBJS += \
|
|||
xdotc_k$(TSUFFIX).$(SUFFIX) xdotu_k$(TSUFFIX).$(SUFFIX) xnrm2_k$(TSUFFIX).$(SUFFIX) xqrot_k$(TSUFFIX).$(SUFFIX) \
|
||||
xscal_k$(TSUFFIX).$(SUFFIX) xswap_k$(TSUFFIX).$(SUFFIX) xsum_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ifeq ($(BUILD_HALF),1)
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
SHBLASOBJS += \
|
||||
shdot_k$(TSUFFIX).$(SUFFIX)
|
||||
sbdot_k$(TSUFFIX).$(SUFFIX)
|
||||
SHEXTOBJS += \
|
||||
shstobf16_k$(TSUFFIX).$(SUFFIX) shdtobf16_k$(TSUFFIX).$(SUFFIX)
|
||||
sbstobf16_k$(TSUFFIX).$(SUFFIX) sbdtobf16_k$(TSUFFIX).$(SUFFIX)
|
||||
SHEXTOBJS += \
|
||||
sbf16tos_k$(TSUFFIX).$(SUFFIX) dbf16tod_k$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
@ -757,12 +757,12 @@ $(KDIR)ddot_k$(TSUFFIX).$(SUFFIX) $(KDIR)ddot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
|
|||
$(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
|
||||
|
||||
ifeq ($(BUILD_HALF),1)
|
||||
$(KDIR)shdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)shdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHDOTKERNEL)
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
$(KDIR)sbdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sbdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
|
||||
$(KDIR)shstobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL)
|
||||
$(KDIR)sbstobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL)
|
||||
$(CC) -c $(CFLAGS) -UDOUBLE -DSINGLE $< -o $@
|
||||
$(KDIR)shdtobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL)
|
||||
$(KDIR)sbdtobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL)
|
||||
$(CC) -c $(CFLAGS) -DDOUBLE -USINGLE $< -o $@
|
||||
$(KDIR)sbf16tos_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(BF16TOKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UDOUBLE -DSINGLE $< -o $@
|
||||
|
|
|
@ -80,24 +80,24 @@ SGEMMDIRECTPERFORMANT = sgemm_direct_performant.c
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_HALF), 1)
|
||||
ifndef SHGEMMKERNEL
|
||||
SHGEMM_BETA = ../generic/gemm_beta.c
|
||||
SHGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SHGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||
SHGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||
SHGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
SHGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
ifndef SBGEMMKERNEL
|
||||
SBGEMM_BETA = ../generic/gemm_beta.c
|
||||
SBGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SBGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||
SBGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||
SBGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
SBGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
SHKERNELOBJS += \
|
||||
shgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
||||
$(SHGEMMINCOPYOBJ) $(SHGEMMITCOPYOBJ) \
|
||||
$(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ)
|
||||
sbgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
||||
$(SBGEMMINCOPYOBJ) $(SBGEMMITCOPYOBJ) \
|
||||
$(SBGEMMONCOPYOBJ) $(SBGEMMOTCOPYOBJ)
|
||||
endif
|
||||
|
||||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
|
||||
|
@ -149,7 +149,7 @@ XKERNELOBJS += \
|
|||
$(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \
|
||||
$(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ)
|
||||
|
||||
ifeq ($(BUILD_HALF),1)
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
SHBLASOBJS += $(SHKERNELOBJS)
|
||||
endif
|
||||
SBLASOBJS += $(SKERNELOBJS)
|
||||
|
@ -159,8 +159,8 @@ CBLASOBJS += $(CKERNELOBJS)
|
|||
ZBLASOBJS += $(ZKERNELOBJS)
|
||||
XBLASOBJS += $(XKERNELOBJS)
|
||||
|
||||
ifeq ($(BUILD_HALF),1)
|
||||
SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX)
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
SHBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
|
||||
|
@ -492,11 +492,11 @@ ZBLASOBJS += \
|
|||
zgeadd_k$(TSUFFIX).$(SUFFIX)
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_HALF), 1)
|
||||
SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SHGEMMITCOPYOBJ_P = $(SHGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SHGEMMONCOPYOBJ_P = $(SHGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SHGEMMOTCOPYOBJ_P = $(SHGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
SBGEMMINCOPYOBJ_P = $(SBGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SBGEMMITCOPYOBJ_P = $(SBGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SBGEMMONCOPYOBJ_P = $(SBGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SBGEMMOTCOPYOBJ_P = $(SBGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
endif
|
||||
|
||||
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
@ -524,9 +524,9 @@ XGEMMITCOPYOBJ_P = $(XGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
|||
XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
ifeq ($(BUILD_HALF),1)
|
||||
$(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
$(KDIR)sbgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA)
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
|
||||
$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
||||
|
@ -548,35 +548,35 @@ $(KDIR)xgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
|
|||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
||||
|
||||
|
||||
ifeq ($(BUILD_HALF), 1)
|
||||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
|
||||
$(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY)
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
$(KDIR)$(SBGEMMONCOPYOBJ) : $(KERNELDIR)/$(SBGEMMONCOPY)
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
|
||||
$(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY)
|
||||
$(KDIR)$(SBGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SBGEMMOTCOPY)
|
||||
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmotcopy.s
|
||||
m4 shgemmotcopy.s > shgemmotcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros.s -o $@
|
||||
rm shgemmotcopy.s shgemmotcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy.s
|
||||
m4 sbgemmotcopy.s > sbgemmotcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros.s -o $@
|
||||
rm sbgemmotcopy.s sbgemmotcopy_nomacros.s
|
||||
else
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
|
||||
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
|
||||
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
|
||||
|
||||
$(KDIR)$(SHGEMMINCOPYOBJ) : $(KERNELDIR)/$(SHGEMMINCOPY)
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
$(KDIR)$(SBGEMMINCOPYOBJ) : $(KERNELDIR)/$(SBGEMMINCOPY)
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
|
||||
$(KDIR)$(SHGEMMITCOPYOBJ) : $(KERNELDIR)/$(SHGEMMITCOPY)
|
||||
$(KDIR)$(SBGEMMITCOPYOBJ) : $(KERNELDIR)/$(SBGEMMITCOPY)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmitcopy.s
|
||||
m4 shgemmitcopy.s > shgemmitcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros.s -o $@
|
||||
rm shgemmitcopy.s shgemmitcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy.s
|
||||
m4 sbgemmitcopy.s > sbgemmitcopy_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros.s -o $@
|
||||
rm sbgemmitcopy.s sbgemmitcopy_nomacros.s
|
||||
else
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
|
||||
endif
|
||||
|
@ -746,16 +746,16 @@ $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL)
|
|||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_HALF), 1)
|
||||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
|
||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
|
||||
$(KDIR)sbgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND)
|
||||
ifeq ($(OS), AIX)
|
||||
$(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemm_kernel$(TSUFFIX).s
|
||||
m4 shgemm_kernel$(TSUFFIX).s > shgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
||||
rm shgemm_kernel$(TSUFFIX).s shgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel$(TSUFFIX).s
|
||||
m4 sbgemm_kernel$(TSUFFIX).s > sbgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel$(TSUFFIX)_nomacros.s -o $@
|
||||
rm sbgemm_kernel$(TSUFFIX).s sbgemm_kernel$(TSUFFIX)_nomacros.s
|
||||
else
|
||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
$(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@ -2375,9 +2375,9 @@ $(KDIR)xtrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_
|
|||
$(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
||||
|
||||
ifeq ($(BUILD_HALF),1)
|
||||
$(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
|
||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
$(KDIR)sbgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA)
|
||||
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
|
||||
$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA)
|
||||
|
@ -2396,19 +2396,19 @@ $(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
|
|||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
||||
|
||||
|
||||
ifeq ($(BUILD_HALF), 1)
|
||||
$(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY)
|
||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
$(SBGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMONCOPY)
|
||||
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
|
||||
$(SHGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMOTCOPY)
|
||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
$(SBGEMMOTCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMOTCOPY)
|
||||
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
|
||||
ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N))
|
||||
$(SHGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMINCOPY)
|
||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
|
||||
$(SBGEMMINCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMINCOPY)
|
||||
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
|
||||
$(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY)
|
||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
$(SBGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SBGEMMITCOPY)
|
||||
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
|
||||
endif
|
||||
endif
|
||||
|
@ -2518,9 +2518,9 @@ endif
|
|||
endif
|
||||
|
||||
|
||||
ifeq ($(BUILD_HALF), 1)
|
||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
|
||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
$(KDIR)sbgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL) $(SBGEMMDEPEND)
|
||||
$(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
|
||||
endif
|
||||
|
||||
$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
|
||||
|
|
|
@ -53,32 +53,32 @@ gotoblas_t TABLE_NAME = {
|
|||
|
||||
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
#ifdef BUILD_BFLOAT16
|
||||
0, 0, 0,
|
||||
SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
|
||||
#ifdef SHGEMM_DEFAULT_UNROLL_MN
|
||||
SHGEMM_DEFAULT_UNROLL_MN,
|
||||
SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N,
|
||||
#ifdef SBGEMM_DEFAULT_UNROLL_MN
|
||||
SBGEMM_DEFAULT_UNROLL_MN,
|
||||
#else
|
||||
MAX(SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N),
|
||||
MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
|
||||
#endif
|
||||
|
||||
shstobf16_kTS, shdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
|
||||
sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
|
||||
|
||||
samax_kTS, samin_kTS, smax_kTS, smin_kTS,
|
||||
isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
|
||||
snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, shdot_kTS,
|
||||
snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
|
||||
dsdot_kTS,
|
||||
srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
|
||||
sgemv_nTS, sgemv_tTS, sger_kTS,
|
||||
ssymv_LTS, ssymv_UTS,
|
||||
|
||||
shgemm_kernelTS, shgemm_betaTS,
|
||||
#if SHGEMM_DEFAULT_UNROLL_M != SHGEMM_DEFAULT_UNROLL_N
|
||||
shgemm_incopyTS, shgemm_itcopyTS,
|
||||
sbgemm_kernelTS, sbgemm_betaTS,
|
||||
#if SBGEMM_DEFAULT_UNROLL_M != SBGEMM_DEFAULT_UNROLL_N
|
||||
sbgemm_incopyTS, sbgemm_itcopyTS,
|
||||
#else
|
||||
shgemm_oncopyTS, shgemm_otcopyTS,
|
||||
sbgemm_oncopyTS, sbgemm_otcopyTS,
|
||||
#endif
|
||||
shgemm_oncopyTS, shgemm_otcopyTS,
|
||||
sbgemm_oncopyTS, sbgemm_otcopyTS,
|
||||
|
||||
strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
|
||||
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
|
||||
|
@ -830,8 +830,8 @@ gotoblas_t TABLE_NAME = {
|
|||
|
||||
#if (ARCH_ARM64)
|
||||
static void init_parameter(void) {
|
||||
#if (BUILD_HALF)
|
||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
||||
#if (BUILD_BFLOAT16)
|
||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||
#endif
|
||||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
|
||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||
|
@ -846,8 +846,8 @@ static void init_parameter(void) {
|
|||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||
#endif
|
||||
|
||||
#if (BUILD_HALF)
|
||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
||||
#if (BUILD_BFLOAT16)
|
||||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||
#endif
|
||||
#if BUILD_SINGLE == 1
|
||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||
|
@ -862,8 +862,8 @@ static void init_parameter(void) {
|
|||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
|
||||
#endif
|
||||
|
||||
#if (BUILD_HALF)
|
||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
||||
#if (BUILD_BFLOAT16)
|
||||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||
#endif
|
||||
#if BUILD_SINGLE == 1
|
||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||
|
@ -936,16 +936,16 @@ static void init_parameter(void) {
|
|||
#if (ARCH_POWER)
|
||||
static void init_parameter(void) {
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||
#endif
|
||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||
#endif
|
||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||
|
@ -953,8 +953,8 @@ static void init_parameter(void) {
|
|||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||
#endif
|
||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||
|
@ -965,16 +965,16 @@ static void init_parameter(void) {
|
|||
|
||||
#if (ARCH_ZARCH)
|
||||
static void init_parameter(void) {
|
||||
#ifdef BUILD_HALF
|
||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||
#endif
|
||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||
#endif
|
||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||
|
@ -982,8 +982,8 @@ static void init_parameter(void) {
|
|||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||
#endif
|
||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||
|
@ -1124,10 +1124,10 @@ static void init_parameter(void) {
|
|||
(void) l2; /* dirty trick to suppress unused variable warning for targets */
|
||||
/* where the GEMM unrolling parameters do not depend on l2 */
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
||||
#ifdef BUILD_BFLOAT16
|
||||
TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P;
|
||||
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
|
||||
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
|
||||
#endif
|
||||
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
|
||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||
|
|
Loading…
Reference in New Issue