Make building the bfloat16 functions conditional on option BUILD_HALF (#2590)
* make building the bfloat16 BLAS functions conditional on BUILD_HALF * pass the BUILD_HALF option to gensymbol * Pass BUILD_HALF as a compiler define for dynamic_arch builds
This commit is contained in:
parent
a54e35e780
commit
5dd14e3d48
|
@ -86,10 +86,13 @@ if (NOT NO_LAPACK)
|
||||||
list(APPEND SUBDIRS lapack)
|
list(APPEND SUBDIRS lapack)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (NOT DEFINED BUILD_HALF)
|
||||||
|
set (BUILD_HALF false)
|
||||||
|
endif ()
|
||||||
# set which float types we want to build for
|
# set which float types we want to build for
|
||||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
||||||
# if none are defined, build for all
|
# if none are defined, build for all
|
||||||
set(BUILD_HALF true)
|
# set(BUILD_HALF true)
|
||||||
set(BUILD_SINGLE true)
|
set(BUILD_SINGLE true)
|
||||||
set(BUILD_DOUBLE true)
|
set(BUILD_DOUBLE true)
|
||||||
set(BUILD_COMPLEX true)
|
set(BUILD_COMPLEX true)
|
||||||
|
@ -121,7 +124,7 @@ if (BUILD_COMPLEX16)
|
||||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (BUILD_SINGLE OR BUILD_HALF)
|
if (BUILD_HALF)
|
||||||
message(STATUS "Building Half Precision")
|
message(STATUS "Building Half Precision")
|
||||||
list(APPEND FLOAT_TYPES "HALF") # defines nothing
|
list(APPEND FLOAT_TYPES "HALF") # defines nothing
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -273,6 +273,9 @@ COMMON_PROF = -pg
|
||||||
#
|
#
|
||||||
# CPP_THREAD_SAFETY_TEST = 1
|
# CPP_THREAD_SAFETY_TEST = 1
|
||||||
|
|
||||||
|
|
||||||
|
# If you want to enable the experimental BFLOAT16 support
|
||||||
|
# BUILD_HALF = 1
|
||||||
#
|
#
|
||||||
# End of user configuration
|
# End of user configuration
|
||||||
#
|
#
|
||||||
|
|
|
@ -1124,6 +1124,10 @@ ifeq ($(USE_TLS), 1)
|
||||||
CCOMMON_OPT += -DUSE_TLS
|
CCOMMON_OPT += -DUSE_TLS
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF), 1)
|
||||||
|
CCOMMON_OPT += -DBUILD_HALF
|
||||||
|
endif
|
||||||
|
|
||||||
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
|
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
|
||||||
|
|
||||||
ifndef SYMBOLPREFIX
|
ifndef SYMBOLPREFIX
|
||||||
|
@ -1395,6 +1399,7 @@ export KERNELDIR
|
||||||
export FUNCTION_PROFILE
|
export FUNCTION_PROFILE
|
||||||
export TARGET_CORE
|
export TARGET_CORE
|
||||||
export NO_AVX512
|
export NO_AVX512
|
||||||
|
export BUILD_HALF
|
||||||
|
|
||||||
export SHGEMM_UNROLL_M
|
export SHGEMM_UNROLL_M
|
||||||
export SHGEMM_UNROLL_N
|
export SHGEMM_UNROLL_N
|
||||||
|
|
|
@ -113,6 +113,7 @@ macro(SetDefaultL1)
|
||||||
set(ZSUMKERNEL zsum.S)
|
set(ZSUMKERNEL zsum.S)
|
||||||
set(QSUMKERNEL sum.S)
|
set(QSUMKERNEL sum.S)
|
||||||
set(XSUMKERNEL zsum.S)
|
set(XSUMKERNEL zsum.S)
|
||||||
|
if (BUILD_HALF)
|
||||||
set(SHAMINKERNEL ../arm/amin.c)
|
set(SHAMINKERNEL ../arm/amin.c)
|
||||||
set(SHAMAXKERNEL ../arm/amax.c)
|
set(SHAMAXKERNEL ../arm/amax.c)
|
||||||
set(SHMAXKERNEL ../arm/max.c)
|
set(SHMAXKERNEL ../arm/max.c)
|
||||||
|
@ -131,6 +132,7 @@ macro(SetDefaultL1)
|
||||||
set(SHNRM2KERNEL ../arm/nrm2.c)
|
set(SHNRM2KERNEL ../arm/nrm2.c)
|
||||||
set(SHSUMKERNEL ../arm/sum.c)
|
set(SHSUMKERNEL ../arm/sum.c)
|
||||||
set(SHSWAPKERNEL ../arm/swap.c)
|
set(SHSWAPKERNEL ../arm/swap.c)
|
||||||
|
endif ()
|
||||||
endmacro ()
|
endmacro ()
|
||||||
|
|
||||||
macro(SetDefaultL2)
|
macro(SetDefaultL2)
|
||||||
|
@ -179,10 +181,11 @@ macro(SetDefaultL2)
|
||||||
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
|
||||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||||
|
if (BUILD_HALF)
|
||||||
set(SHGEMVNKERNEL ../arm/gemv_n.c)
|
set(SHGEMVNKERNEL ../arm/gemv_n.c)
|
||||||
set(SHGEMVTKERNEL ../arm/gemv_t.c)
|
set(SHGEMVTKERNEL ../arm/gemv_t.c)
|
||||||
set(SHGERKERNEL ../generic/ger.c)
|
set(SHGERKERNEL ../generic/ger.c)
|
||||||
|
endif ()
|
||||||
endmacro ()
|
endmacro ()
|
||||||
|
|
||||||
macro(SetDefaultL3)
|
macro(SetDefaultL3)
|
||||||
|
@ -190,6 +193,7 @@ macro(SetDefaultL3)
|
||||||
set(DGEADD_KERNEL ../generic/geadd.c)
|
set(DGEADD_KERNEL ../generic/geadd.c)
|
||||||
set(CGEADD_KERNEL ../generic/zgeadd.c)
|
set(CGEADD_KERNEL ../generic/zgeadd.c)
|
||||||
set(ZGEADD_KERNEL ../generic/zgeadd.c)
|
set(ZGEADD_KERNEL ../generic/zgeadd.c)
|
||||||
|
if (BUILD_HALF)
|
||||||
set(SHGEADD_KERNEL ../generic/geadd.c)
|
set(SHGEADD_KERNEL ../generic/geadd.c)
|
||||||
set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
|
set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
|
||||||
set(SHGEMM_BETA ../generic/gemm_beta.c)
|
set(SHGEMM_BETA ../generic/gemm_beta.c)
|
||||||
|
@ -201,6 +205,6 @@ macro(SetDefaultL3)
|
||||||
set(SHGEMMITCOPYOBJ shgemm_itcopy.o)
|
set(SHGEMMITCOPYOBJ shgemm_itcopy.o)
|
||||||
set(SHGEMMONCOPYOBJ shgemm_oncopy.o)
|
set(SHGEMMONCOPYOBJ shgemm_oncopy.o)
|
||||||
set(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
|
set(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
|
||||||
|
endif ()
|
||||||
|
|
||||||
endmacro ()
|
endmacro ()
|
||||||
|
|
|
@ -47,7 +47,7 @@ typedef struct {
|
||||||
int dtb_entries;
|
int dtb_entries;
|
||||||
int offsetA, offsetB, align;
|
int offsetA, offsetB, align;
|
||||||
|
|
||||||
#if 1
|
#ifdef BUILD_HALF
|
||||||
int shgemm_p, shgemm_q, shgemm_r;
|
int shgemm_p, shgemm_q, shgemm_r;
|
||||||
int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn;
|
int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn;
|
||||||
|
|
||||||
|
@ -1002,12 +1002,14 @@ extern gotoblas_t *gotoblas;
|
||||||
|
|
||||||
#define HAVE_EX_L2 gotoblas -> exclusive_cache
|
#define HAVE_EX_L2 gotoblas -> exclusive_cache
|
||||||
|
|
||||||
|
#ifdef BUILD_HALF
|
||||||
#define SHGEMM_P gotoblas -> shgemm_p
|
#define SHGEMM_P gotoblas -> shgemm_p
|
||||||
#define SHGEMM_Q gotoblas -> shgemm_q
|
#define SHGEMM_Q gotoblas -> shgemm_q
|
||||||
#define SHGEMM_R gotoblas -> shgemm_r
|
#define SHGEMM_R gotoblas -> shgemm_r
|
||||||
#define SHGEMM_UNROLL_M gotoblas -> shgemm_unroll_m
|
#define SHGEMM_UNROLL_M gotoblas -> shgemm_unroll_m
|
||||||
#define SHGEMM_UNROLL_N gotoblas -> shgemm_unroll_n
|
#define SHGEMM_UNROLL_N gotoblas -> shgemm_unroll_n
|
||||||
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
|
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SGEMM_P gotoblas -> sgemm_p
|
#define SGEMM_P gotoblas -> sgemm_p
|
||||||
#define SGEMM_Q gotoblas -> sgemm_q
|
#define SGEMM_Q gotoblas -> sgemm_q
|
||||||
|
@ -1086,6 +1088,7 @@ extern gotoblas_t *gotoblas;
|
||||||
#define HAVE_EX_L2 0
|
#define HAVE_EX_L2 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef BUILD_HALF
|
||||||
#define SHGEMM_P SHGEMM_DEFAULT_P
|
#define SHGEMM_P SHGEMM_DEFAULT_P
|
||||||
#define SHGEMM_Q SHGEMM_DEFAULT_Q
|
#define SHGEMM_Q SHGEMM_DEFAULT_Q
|
||||||
#define SHGEMM_R SHGEMM_DEFAULT_R
|
#define SHGEMM_R SHGEMM_DEFAULT_R
|
||||||
|
@ -1096,6 +1099,7 @@ extern gotoblas_t *gotoblas;
|
||||||
#else
|
#else
|
||||||
#define SHGEMM_UNROLL_MN MAX((SHGEMM_UNROLL_M), (SHGEMM_UNROLL_N))
|
#define SHGEMM_UNROLL_MN MAX((SHGEMM_UNROLL_M), (SHGEMM_UNROLL_N))
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SGEMM_P SGEMM_DEFAULT_P
|
#define SGEMM_P SGEMM_DEFAULT_P
|
||||||
#define SGEMM_Q SGEMM_DEFAULT_Q
|
#define SGEMM_Q SGEMM_DEFAULT_Q
|
||||||
|
@ -1330,31 +1334,31 @@ extern gotoblas_t *gotoblas;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SHGEMM_DEFAULT_R
|
#ifndef SHGEMM_DEFAULT_R
|
||||||
#define SHGEMM_DEFAULT_R (((BUFFER_SIZE - ((SHGEMM_DEFAULT_P * SHGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SHGEMM_DEFAULT_Q * 4) - 15) & ~15)
|
#define SHGEMM_DEFAULT_R (((BUFFER_SIZE - ((SHGEMM_DEFAULT_P * SHGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SHGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SGEMM_DEFAULT_R
|
#ifndef SGEMM_DEFAULT_R
|
||||||
#define SGEMM_DEFAULT_R (((BUFFER_SIZE - ((SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SGEMM_DEFAULT_Q * 4) - 15) & ~15)
|
#define SGEMM_DEFAULT_R (((BUFFER_SIZE - ((SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef DGEMM_DEFAULT_R
|
#ifndef DGEMM_DEFAULT_R
|
||||||
#define DGEMM_DEFAULT_R (((BUFFER_SIZE - ((DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (DGEMM_DEFAULT_Q * 8) - 15) & ~15)
|
#define DGEMM_DEFAULT_R (((BUFFER_SIZE - ((DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (DGEMM_DEFAULT_Q * 8) - 15) & ~15UL)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef QGEMM_DEFAULT_R
|
#ifndef QGEMM_DEFAULT_R
|
||||||
#define QGEMM_DEFAULT_R (((BUFFER_SIZE - ((QGEMM_DEFAULT_P * QGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (QGEMM_DEFAULT_Q * 16) - 15) & ~15)
|
#define QGEMM_DEFAULT_R (((BUFFER_SIZE - ((QGEMM_DEFAULT_P * QGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (QGEMM_DEFAULT_Q * 16) - 15) & ~15UL)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CGEMM_DEFAULT_R
|
#ifndef CGEMM_DEFAULT_R
|
||||||
#define CGEMM_DEFAULT_R (((BUFFER_SIZE - ((CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (CGEMM_DEFAULT_Q * 8) - 15) & ~15)
|
#define CGEMM_DEFAULT_R (((BUFFER_SIZE - ((CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (CGEMM_DEFAULT_Q * 8) - 15) & ~15UL)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ZGEMM_DEFAULT_R
|
#ifndef ZGEMM_DEFAULT_R
|
||||||
#define ZGEMM_DEFAULT_R (((BUFFER_SIZE - ((ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (ZGEMM_DEFAULT_Q * 16) - 15) & ~15)
|
#define ZGEMM_DEFAULT_R (((BUFFER_SIZE - ((ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (ZGEMM_DEFAULT_Q * 16) - 15) & ~15UL)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef XGEMM_DEFAULT_R
|
#ifndef XGEMM_DEFAULT_R
|
||||||
#define XGEMM_DEFAULT_R (((BUFFER_SIZE - ((XGEMM_DEFAULT_P * XGEMM_DEFAULT_Q * 32 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (XGEMM_DEFAULT_Q * 32) - 15) & ~15)
|
#define XGEMM_DEFAULT_R (((BUFFER_SIZE - ((XGEMM_DEFAULT_P * XGEMM_DEFAULT_Q * 32 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (XGEMM_DEFAULT_Q * 32) - 15) & ~15UL)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SNUMOPT
|
#ifndef SNUMOPT
|
||||||
|
|
|
@ -19,7 +19,10 @@ ifeq ($(ARCH), MIPS)
|
||||||
USE_GEMM3M = 1
|
USE_GEMM3M = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
SHBLASOBJS += shgemm_nn.$(SUFFIX) shgemm_nt.$(SUFFIX) shgemm_tn.$(SUFFIX) shgemm_tt.$(SUFFIX)
|
SHBLASOBJS += shgemm_nn.$(SUFFIX) shgemm_nt.$(SUFFIX) shgemm_tn.$(SUFFIX) shgemm_tt.$(SUFFIX)
|
||||||
|
endif
|
||||||
|
|
||||||
SBLASOBJS += \
|
SBLASOBJS += \
|
||||||
sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) \
|
sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) \
|
||||||
strmm_LNUU.$(SUFFIX) strmm_LNUN.$(SUFFIX) strmm_LNLU.$(SUFFIX) strmm_LNLN.$(SUFFIX) \
|
strmm_LNUU.$(SUFFIX) strmm_LNUN.$(SUFFIX) strmm_LNLU.$(SUFFIX) strmm_LNLN.$(SUFFIX) \
|
||||||
|
@ -204,8 +207,9 @@ COMMONOBJS += gemm_thread_m.$(SUFFIX) gemm_thread_n.$(SUFFIX) gemm_thread_mn.$(
|
||||||
COMMONOBJS += syrk_thread.$(SUFFIX)
|
COMMONOBJS += syrk_thread.$(SUFFIX)
|
||||||
|
|
||||||
ifndef USE_SIMPLE_THREADED_LEVEL3
|
ifndef USE_SIMPLE_THREADED_LEVEL3
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
SHBLASOBJS += shgemm_thread_nn.$(SUFFIX) shgemm_thread_nt.$(SUFFIX) shgemm_thread_tn.$(SUFFIX) shgemm_thread_tt.$(SUFFIX)
|
SHBLASOBJS += shgemm_thread_nn.$(SUFFIX) shgemm_thread_nt.$(SUFFIX) shgemm_thread_tn.$(SUFFIX) shgemm_thread_tt.$(SUFFIX)
|
||||||
|
endif
|
||||||
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX)
|
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX)
|
||||||
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
|
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
|
||||||
QBLASOBJS += qgemm_thread_nn.$(SUFFIX) qgemm_thread_nt.$(SUFFIX) qgemm_thread_tn.$(SUFFIX) qgemm_thread_tt.$(SUFFIX)
|
QBLASOBJS += qgemm_thread_nn.$(SUFFIX) qgemm_thread_nt.$(SUFFIX) qgemm_thread_tn.$(SUFFIX) qgemm_thread_tt.$(SUFFIX)
|
||||||
|
|
|
@ -30,6 +30,10 @@ ifndef BUILD_LAPACK_DEPRECATED
|
||||||
BUILD_LAPACK_DEPRECATED = 0
|
BUILD_LAPACK_DEPRECATED = 0
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef BUILD_HALF
|
||||||
|
BUILD_HALF = 0
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
ifeq ($(F_COMPILER), GFORTRAN)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
ifndef ONLY_CBLAS
|
ifndef ONLY_CBLAS
|
||||||
|
@ -234,23 +238,23 @@ static : ../$(LIBNAME)
|
||||||
rm -f goto.$(SUFFIX)
|
rm -f goto.$(SUFFIX)
|
||||||
|
|
||||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
|
||||||
|
|
||||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
|
||||||
|
|
||||||
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
|
||||||
|
|
||||||
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
|
||||||
|
|
||||||
test : linktest.c
|
test : linktest.c
|
||||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||||
rm -f linktest
|
rm -f linktest
|
||||||
|
|
||||||
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > linktest.c
|
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > linktest.c
|
||||||
|
|
||||||
clean ::
|
clean ::
|
||||||
@rm -f *.def *.dylib __.SYMDEF* *.renamed
|
@rm -f *.def *.dylib __.SYMDEF* *.renamed
|
||||||
|
|
|
@ -30,7 +30,7 @@
|
||||||
icamax,icamin,idamax,idamin,idmax,idmin,isamax,isamin,ismax,ismin,
|
icamax,icamin,idamax,idamin,idmax,idmin,isamax,isamin,ismax,ismin,
|
||||||
izamax,izamin,lsame,samax,samin,sasum,saxpy,scabs1,scamax,
|
izamax,izamin,lsame,samax,samin,sasum,saxpy,scabs1,scamax,
|
||||||
scamin,scasum,scnrm2,scopy,sdot,sdsdot,sgbmv,sgemm,sgemv,sger,
|
scamin,scasum,scnrm2,scopy,sdot,sdsdot,sgbmv,sgemm,sgemv,sger,
|
||||||
shgemm, smax,smin,snrm2,
|
smax,smin,snrm2,
|
||||||
srot,srotg,srotm,srotmg,ssbmv,sscal,sspmv,sspr2,sspr,sswap,
|
srot,srotg,srotm,srotmg,ssbmv,sscal,sspmv,sspr2,sspr,sswap,
|
||||||
ssymm,ssymv,ssyr2,ssyr2k,ssyr,ssyrk,stbmv,stbsv,stpmv,stpsv,
|
ssymm,ssymv,ssyr2,ssyr2k,ssyr,ssyrk,stbmv,stbsv,stpmv,stpsv,
|
||||||
strmm,strmv,strsm,strsv,zaxpy,zcopy,zdotc,zdotu,zdrot,
|
strmm,strmv,strsm,strsv,zaxpy,zcopy,zdotc,zdotu,zdrot,
|
||||||
|
@ -51,6 +51,7 @@
|
||||||
zimatcopy,
|
zimatcopy,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@halfblasobjs = (shgemm);
|
||||||
@cblasobjs = (
|
@cblasobjs = (
|
||||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
||||||
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
|
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
|
||||||
|
@ -67,7 +68,7 @@
|
||||||
cblas_isamax, cblas_izamax,
|
cblas_isamax, cblas_izamax,
|
||||||
cblas_sasum, cblas_saxpy,
|
cblas_sasum, cblas_saxpy,
|
||||||
cblas_scasum, cblas_scnrm2, cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm,
|
cblas_scasum, cblas_scnrm2, cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm,
|
||||||
cblas_sgemv, cblas_sger, cblas_shgemm, cblas_snrm2, cblas_srot, cblas_srotg,
|
cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg,
|
||||||
cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr,
|
cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr,
|
||||||
cblas_sswap, cblas_ssymm, cblas_ssymv, cblas_ssyr2, cblas_ssyr2k, cblas_ssyr, cblas_ssyrk,
|
cblas_sswap, cblas_ssymm, cblas_ssymv, cblas_ssyr2, cblas_ssyr2k, cblas_ssyr, cblas_ssyrk,
|
||||||
cblas_stbmv, cblas_stbsv, cblas_stpmv, cblas_stpsv, cblas_strmm, cblas_strmv, cblas_strsm,
|
cblas_stbmv, cblas_stbsv, cblas_stpmv, cblas_stpsv, cblas_strmm, cblas_strmv, cblas_strsm,
|
||||||
|
@ -83,6 +84,8 @@
|
||||||
cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd
|
cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@halfcblasobjs = (cblas_shgemm);
|
||||||
|
|
||||||
@exblasobjs = (
|
@exblasobjs = (
|
||||||
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
|
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
|
||||||
qgemv,qger,qmax,qmin,
|
qgemv,qger,qmax,qmin,
|
||||||
|
@ -3454,6 +3457,10 @@ use File::Spec;
|
||||||
use File::Basename;
|
use File::Basename;
|
||||||
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib");
|
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib");
|
||||||
|
|
||||||
|
if ($ARGV[12] == 1) {
|
||||||
|
@blasobjs = (@blasobjs, @halfblasobjs);
|
||||||
|
@cblasobjs = (@cblasobjs, @halfcblasobjs);
|
||||||
|
}
|
||||||
if ($ARGV[8] == 1) {
|
if ($ARGV[8] == 1) {
|
||||||
#ONLY_CBLAS=1
|
#ONLY_CBLAS=1
|
||||||
@underscore_objs = (@misc_underscore_objs);
|
@underscore_objs = (@misc_underscore_objs);
|
||||||
|
|
|
@ -46,7 +46,9 @@ SBLAS3OBJS = \
|
||||||
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
|
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
|
||||||
sgeadd.$(SUFFIX)
|
sgeadd.$(SUFFIX)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
SHBLAS3OBJS = shgemm.$(SUFFIX)
|
SHBLAS3OBJS = shgemm.$(SUFFIX)
|
||||||
|
endif
|
||||||
|
|
||||||
DBLAS1OBJS = \
|
DBLAS1OBJS = \
|
||||||
daxpy.$(SUFFIX) dswap.$(SUFFIX) \
|
daxpy.$(SUFFIX) dswap.$(SUFFIX) \
|
||||||
|
@ -278,7 +280,9 @@ CSBLAS3OBJS = \
|
||||||
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
|
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
|
||||||
cblas_sgeadd.$(SUFFIX)
|
cblas_sgeadd.$(SUFFIX)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
CSHBLAS3OBJS = cblas_shgemm.$(SUFFIX)
|
CSHBLAS3OBJS = cblas_shgemm.$(SUFFIX)
|
||||||
|
endif
|
||||||
|
|
||||||
CDBLAS1OBJS = \
|
CDBLAS1OBJS = \
|
||||||
cblas_idamax.$(SUFFIX) cblas_idamin.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \
|
cblas_idamax.$(SUFFIX) cblas_idamin.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \
|
||||||
|
@ -1214,8 +1218,10 @@ zhpr2.$(SUFFIX) zhpr2.$(PSUFFIX) : zhpr2.c
|
||||||
xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
|
xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
shgemm.$(SUFFIX) shgemm.$(PSUFFIX) : gemm.c ../param.h
|
shgemm.$(SUFFIX) shgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
endif
|
||||||
|
|
||||||
sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h
|
sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
@ -1778,8 +1784,10 @@ cblas_zhemv.$(SUFFIX) cblas_zhemv.$(PSUFFIX) : zhemv.c
|
||||||
cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h
|
cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
cblas_shgemm.$(SUFFIX) cblas_shgemm.$(PSUFFIX) : gemm.c ../param.h
|
cblas_shgemm.$(SUFFIX) cblas_shgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||||
|
endif
|
||||||
|
|
||||||
cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h
|
cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h
|
||||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
|
@ -137,7 +137,11 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
foreach (float_type SINGLE DOUBLE HALF)
|
foreach (float_type SINGLE DOUBLE HALF)
|
||||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||||
if (${float_type} STREQUAL "HALF")
|
if (${float_type} STREQUAL "HALF")
|
||||||
set (float_char "SH")
|
if (NOT ${BUILD_HALF})
|
||||||
|
continue ()
|
||||||
|
else ()
|
||||||
|
set (float_char "SH")
|
||||||
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
|
@ -59,7 +59,8 @@ ifeq ($(CORE), Z14)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#ifndef SHGEMMKERNEL
|
ifeq ($(BUILD_HALF), 1)
|
||||||
|
ifndef SHGEMMKERNEL
|
||||||
SHGEMM_BETA = ../generic/gemm_beta.c
|
SHGEMM_BETA = ../generic/gemm_beta.c
|
||||||
SHGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
SHGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
SHGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
SHGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
@ -70,12 +71,13 @@ SHGEMMINCOPYOBJ = shgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
SHGEMMITCOPYOBJ = shgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SHGEMMONCOPYOBJ = shgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
#endif
|
endif
|
||||||
|
|
||||||
SHKERNELOBJS += \
|
SHKERNELOBJS += \
|
||||||
shgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
shgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
||||||
$(SHGEMMINCOPYOBJ) $(SHGEMMITCOPYOBJ) \
|
$(SHGEMMINCOPYOBJ) $(SHGEMMITCOPYOBJ) \
|
||||||
$(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ)
|
$(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ)
|
||||||
|
endif
|
||||||
|
|
||||||
SKERNELOBJS += \
|
SKERNELOBJS += \
|
||||||
sgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
sgemm_kernel$(TSUFFIX).$(SUFFIX) \
|
||||||
|
@ -110,7 +112,9 @@ XKERNELOBJS += \
|
||||||
$(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \
|
$(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \
|
||||||
$(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ)
|
$(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
SHBLASOBJS += $(SHKERNELOBJS)
|
SHBLASOBJS += $(SHKERNELOBJS)
|
||||||
|
endif
|
||||||
SBLASOBJS += $(SKERNELOBJS)
|
SBLASOBJS += $(SKERNELOBJS)
|
||||||
DBLASOBJS += $(DKERNELOBJS)
|
DBLASOBJS += $(DKERNELOBJS)
|
||||||
QBLASOBJS += $(QKERNELOBJS)
|
QBLASOBJS += $(QKERNELOBJS)
|
||||||
|
@ -118,7 +122,10 @@ CBLASOBJS += $(CKERNELOBJS)
|
||||||
ZBLASOBJS += $(ZKERNELOBJS)
|
ZBLASOBJS += $(ZKERNELOBJS)
|
||||||
XBLASOBJS += $(XKERNELOBJS)
|
XBLASOBJS += $(XKERNELOBJS)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX)
|
SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX)
|
||||||
|
endif
|
||||||
|
|
||||||
SBLASOBJS += \
|
SBLASOBJS += \
|
||||||
sgemm_beta$(TSUFFIX).$(SUFFIX) \
|
sgemm_beta$(TSUFFIX).$(SUFFIX) \
|
||||||
strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
|
strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
|
||||||
|
@ -408,11 +415,13 @@ ZBLASOBJS += \
|
||||||
zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
|
zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
|
||||||
zgeadd_k$(TSUFFIX).$(SUFFIX)
|
zgeadd_k$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF), 1)
|
||||||
SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
SHGEMMITCOPYOBJ_P = $(SHGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SHGEMMITCOPYOBJ_P = $(SHGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
SHGEMMONCOPYOBJ_P = $(SHGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SHGEMMONCOPYOBJ_P = $(SHGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
SHGEMMOTCOPYOBJ_P = $(SHGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SHGEMMOTCOPYOBJ_P = $(SHGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
endif
|
||||||
|
|
||||||
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
@ -438,8 +447,10 @@ XGEMMITCOPYOBJ_P = $(XGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
$(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
|
$(KDIR)shgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
endif
|
||||||
|
|
||||||
$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
||||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
@ -459,10 +470,14 @@ $(KDIR)zgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMM_BETA)
|
||||||
$(KDIR)xgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
|
$(KDIR)xgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
|
||||||
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
||||||
|
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF), 1)
|
||||||
|
|
||||||
$(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY)
|
$(KDIR)$(SHGEMMONCOPYOBJ) : $(KERNELDIR)/$(SHGEMMONCOPY)
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
$(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY)
|
$(KDIR)$(SHGEMMOTCOPYOBJ) : $(KERNELDIR)/$(SHGEMMOTCOPY)
|
||||||
|
|
||||||
ifeq ($(OS), AIX)
|
ifeq ($(OS), AIX)
|
||||||
$(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s
|
$(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemmotcopy.s
|
||||||
m4 shgemmotcopy.s > shgemmotcopy_nomacros.s
|
m4 shgemmotcopy.s > shgemmotcopy_nomacros.s
|
||||||
|
@ -487,6 +502,7 @@ else
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY)
|
$(KDIR)$(SGEMMONCOPYOBJ) : $(KERNELDIR)/$(SGEMMONCOPY)
|
||||||
|
@ -646,6 +662,8 @@ else
|
||||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF), 1)
|
||||||
|
|
||||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
|
$(KDIR)shgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
|
||||||
ifeq ($(OS), AIX)
|
ifeq ($(OS), AIX)
|
||||||
$(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s
|
$(CC) $(CFLAGS) -E -DHALF -UDOUBLE -UCOMPLEX $< -o shgemm_kernel$(TSUFFIX).s
|
||||||
|
@ -655,6 +673,7 @@ ifeq ($(OS), AIX)
|
||||||
else
|
else
|
||||||
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
$(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND)
|
$(KDIR)dgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DGEMMKERNEL) $(DGEMMDEPEND)
|
||||||
ifeq ($(OS), AIX)
|
ifeq ($(OS), AIX)
|
||||||
|
@ -2272,8 +2291,10 @@ $(KDIR)xtrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_
|
||||||
$(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
$(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
|
||||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF),1)
|
||||||
$(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
|
$(KDIR)shgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
endif
|
||||||
|
|
||||||
$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA)
|
$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA)
|
||||||
$(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
@ -2290,6 +2311,8 @@ $(KDIR)zgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMM_BETA)
|
||||||
$(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
|
$(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
|
||||||
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
|
||||||
|
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF), 1)
|
||||||
$(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY)
|
$(SHGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMONCOPY)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
|
@ -2304,6 +2327,8 @@ $(SHGEMMITCOPYOBJ_P) : $(KERNELDIR)/$(SHGEMMITCOPY)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
$(SGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMONCOPY)
|
$(SGEMMONCOPYOBJ_P) : $(KERNELDIR)/$(SGEMMONCOPY)
|
||||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
||||||
|
@ -2408,8 +2433,11 @@ endif
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
ifeq ($(BUILD_HALF), 1)
|
||||||
$(KDIR)shgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
|
$(KDIR)shgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL) $(SHGEMMDEPEND)
|
||||||
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
endif
|
||||||
|
|
||||||
$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
|
$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
|
||||||
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
|
||||||
|
|
|
@ -53,6 +53,7 @@ gotoblas_t TABLE_NAME = {
|
||||||
|
|
||||||
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,
|
||||||
|
|
||||||
|
#ifdef BUILD_HALF
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
|
SHGEMM_DEFAULT_UNROLL_M, SHGEMM_DEFAULT_UNROLL_N,
|
||||||
#ifdef SHGEMM_DEFAULT_UNROLL_MN
|
#ifdef SHGEMM_DEFAULT_UNROLL_MN
|
||||||
|
@ -109,7 +110,7 @@ gotoblas_t TABLE_NAME = {
|
||||||
#else
|
#else
|
||||||
NULL,NULL,
|
NULL,NULL,
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
0, 0, 0,
|
0, 0, 0,
|
||||||
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
|
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
|
||||||
|
@ -706,19 +707,25 @@ gotoblas_t TABLE_NAME = {
|
||||||
|
|
||||||
#if defined(ARCH_ARM64)
|
#if defined(ARCH_ARM64)
|
||||||
static void init_parameter(void) {
|
static void init_parameter(void) {
|
||||||
|
#if defined(BUILD_HALF)
|
||||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||||
|
|
||||||
|
#if defined(BUILD_HALF)
|
||||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
|
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
|
||||||
|
|
||||||
|
#if defined(BUILD_HALF)
|
||||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
||||||
|
@ -782,20 +789,26 @@ static void init_parameter(void) {
|
||||||
#if defined(ARCH_POWER)
|
#if defined(ARCH_POWER)
|
||||||
static void init_parameter(void) {
|
static void init_parameter(void) {
|
||||||
|
|
||||||
|
#ifdef BUILD_HALF
|
||||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||||
|
|
||||||
|
#ifdef BUILD_HALF
|
||||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef BUILD_HALF
|
||||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||||
|
@ -805,20 +818,26 @@ static void init_parameter(void) {
|
||||||
|
|
||||||
#if defined(ARCH_ZARCH)
|
#if defined(ARCH_ZARCH)
|
||||||
static void init_parameter(void) {
|
static void init_parameter(void) {
|
||||||
|
#ifdef BUILD_HALF
|
||||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||||
|
|
||||||
|
#ifdef BUILD_HALF
|
||||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef BUILD_HALF
|
||||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||||
|
@ -958,9 +977,11 @@ static void init_parameter(void) {
|
||||||
(void) l2; /* dirty trick to suppress unused variable warning for targets */
|
(void) l2; /* dirty trick to suppress unused variable warning for targets */
|
||||||
/* where the GEMM unrolling parameters do not depend on l2 */
|
/* where the GEMM unrolling parameters do not depend on l2 */
|
||||||
|
|
||||||
|
#ifdef BUILD_HALF
|
||||||
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
|
||||||
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
|
||||||
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
|
||||||
|
#endif
|
||||||
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||||
|
|
Loading…
Reference in New Issue