From e05af6575ee9fa12f2afea8c2c20e80b1529ba84 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 09:05:04 +0200 Subject: [PATCH 1/6] Fix some overlooked "SHBLAS" entries --- Makefile.tail | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Makefile.tail b/Makefile.tail index b14689fc7..54ba649db 100644 --- a/Makefile.tail +++ b/Makefile.tail @@ -1,18 +1,18 @@ -SHBLASOBJS_P = $(SHBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) +SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) -SHEXTOBJS_P = $(SHEXTOBJS:.$(SUFFIX)=.$(PSUFFIX)) +SBEXTOBJS_P = $(SBEXTOBJS:.$(SUFFIX)=.$(PSUFFIX)) COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX)) -BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS) -BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P) +BLASOBJS = $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS) +BLASOBJS_P = $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P) ifdef EXPRECISION BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) @@ -24,23 +24,23 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) endif -$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX +$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX -$(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX +$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX -$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) +$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) -$(SHEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) +$(SBEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) libs :: $(BLASOBJS) $(COMMONOBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ From 2ae87856039e78cf736fb22efb9bc8020697cbe3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 09:07:50 +0200 Subject: [PATCH 2/6] Add a POWER9 build with BFLOAT16 enabled --- .travis.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.travis.yml b/.travis.yml index 4bfdf485c..3f917ce72 100644 --- a/.travis.yml +++ b/.travis.yml @@ -104,6 +104,23 @@ matrix: # for matrix annotation only - TARGET_BOX=PPC64LE_LINUX_P9 + - os: linux + arch: ppc64le + dist: bionic + compiler: gcc + before_script: + - sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y + - sudo apt-get update + - sudo apt-get install gcc-9 gfortran-9 -y + script: + - make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 + - make -C test $COMMON_FLAGS $BTYPE + - make -C ctest $COMMON_FLAGS $BTYPE + - make -C utest $COMMON_FLAGS $BTYPE + env: + # for matrix annotation only + - TARGET_BOX=PPC64LE_LINUX_P9 + - os: linux compiler: gcc addons: From 84949754a0d62fe70beb8d36285328eb446a5dcd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 09:11:36 +0200 Subject: [PATCH 3/6] Fix bfloat16 conditional --- common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.h b/common.h index 89eeb197d..a3ef99b59 100644 --- a/common.h +++ b/common.h @@ -257,7 +257,7 @@ typedef long BLASLONG; typedef unsigned long BLASULONG; #endif -#ifndef BFLOAT16 +#ifndef bfloat16 #include typedef uint16_t bfloat16; #define BFLOAT16CONVERSION 1 From 1e7eb7b7a91838ccba39b9183fb0a5a814c09b7b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 09:17:15 +0200 Subject: [PATCH 4/6] Fix typos in currently unused sections --- interface/Makefile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index a35d53270..1905827f9 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -283,9 +283,9 @@ CSBLAS3OBJS = \ cblas_sgeadd.$(SUFFIX) ifeq ($(BUILD_BFLOAT16),1) -CBHBLAS1OBJS = cblas_sbdot.$(SUFFIX) -CBHBLAS3OBJS = cblas_sbgemm.$(SUFFIX) -CBHEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) +CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX) +CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) +CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) endif CDBLAS1OBJS = \ @@ -535,19 +535,19 @@ endif clean :: @rm -f functable.h -level1 : $(BEXTOBJS) $(SHBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) +level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ -level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) +level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ aux : $(CBAUXOBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ -$(CSHBLASOBJS) $(CSHBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ +$(CSBBLASOBJS) $(CSBBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c From 9dca578c79aec1e736f9fbb233489de85703928d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 13 Oct 2020 10:14:08 +0200 Subject: [PATCH 5/6] Cleanup From b5d30b390dd8d6aed4617c94e5b4fd94425c96d1 Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Tue, 13 Oct 2020 11:00:22 -0500 Subject: [PATCH 6/6] Fix build issues with bfloat16 This patch fixes compilation errors due to recent renaming from SH to SB with BUILD_BFLOAT16. --- cblas.h | 4 ++-- common_interface.h | 4 ++-- common_level1.h | 4 ++-- common_macro.h | 4 ++-- driver/level3/Makefile | 4 ++-- exports/gensymbol | 4 ++-- interface/Makefile | 8 ++++---- kernel/Makefile.L1 | 6 +++--- kernel/Makefile.L3 | 6 +++--- test/Makefile | 8 +++----- 10 files changed, 25 insertions(+), 27 deletions(-) diff --git a/cblas.h b/cblas.h index 4fc6f8681..bf310bed2 100644 --- a/cblas.h +++ b/cblas.h @@ -384,9 +384,9 @@ void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint /*** BFLOAT16 and INT8 extensions ***/ /* convert float array to BFLOAT16 array by rounding */ -void cblas_shstobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); +void cblas_sbstobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); /* convert double array to BFLOAT16 array by rounding */ -void cblas_shdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); +void cblas_sbdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPENBLAS_CONST blasint incin, bfloat16 *out, OPENBLAS_CONST blasint incout); /* convert BFLOAT16 array to float array */ void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, float *out, OPENBLAS_CONST blasint incout); /* convert BFLOAT16 array to double array */ diff --git a/common_interface.h b/common_interface.h index bee09e894..032877fe1 100644 --- a/common_interface.h +++ b/common_interface.h @@ -55,8 +55,8 @@ double BLASFUNC(ddot) (blasint *, double *, blasint *, double *, blasint *); xdouble BLASFUNC(qdot) (blasint *, xdouble *, blasint *, xdouble *, blasint *); float BLASFUNC(sbdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *); -void BLASFUNC(shstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *); -void BLASFUNC(shdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *); +void BLASFUNC(sbstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *); +void BLASFUNC(sbdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *); void BLASFUNC(sbf16tos) (blasint *, bfloat16 *, blasint *, float *, blasint *); void BLASFUNC(dbf16tod) (blasint *, bfloat16 *, blasint *, double *, blasint *); diff --git a/common_level1.h b/common_level1.h index 7b17962c4..d2ed47e56 100644 --- a/common_level1.h +++ b/common_level1.h @@ -48,8 +48,8 @@ double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG); xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); float sbdot_k(BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG); -void shstobf16_k(BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG); -void shdtobf16_k(BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG); +void sbstobf16_k(BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG); +void sbdtobf16_k(BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG); void sbf16tos_k (BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG); void dbf16tod_k (BLASLONG, bfloat16 *, BLASLONG, double *, BLASLONG); diff --git a/common_macro.h b/common_macro.h index 510813b0f..54deed57c 100644 --- a/common_macro.h +++ b/common_macro.h @@ -646,9 +646,9 @@ #elif defined(BFLOAT16) -#define D_TO_BF16_K SHDTOBF16_K +#define D_TO_BF16_K SBDTOBF16_K #define D_BF16_TO_K DBF16TOD_K -#define S_TO_BF16_K SHSTOBF16_K +#define S_TO_BF16_K SBSTOBF16_K #define S_BF16_TO_K SBF16TOS_K #define AMAX_K SAMAX_K diff --git a/driver/level3/Makefile b/driver/level3/Makefile index b4f1e2b26..b528dfa2d 100644 --- a/driver/level3/Makefile +++ b/driver/level3/Makefile @@ -20,7 +20,7 @@ USE_GEMM3M = 1 endif ifeq ($(BUILD_BFLOAT16),1) -SHBLASOBJS += sbgemm_nn.$(SUFFIX) sbgemm_nt.$(SUFFIX) sbgemm_tn.$(SUFFIX) sbgemm_tt.$(SUFFIX) +SBBLASOBJS += sbgemm_nn.$(SUFFIX) sbgemm_nt.$(SUFFIX) sbgemm_tn.$(SUFFIX) sbgemm_tt.$(SUFFIX) endif SBLASOBJS += \ @@ -208,7 +208,7 @@ COMMONOBJS += syrk_thread.$(SUFFIX) ifndef USE_SIMPLE_THREADED_LEVEL3 ifeq ($(BUILD_BFLOAT16),1) -SHBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) +SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) endif SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) diff --git a/exports/gensymbol b/exports/gensymbol index 9ff8e10b1..8482ecb7e 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -51,7 +51,7 @@ zgeadd, dzsum); @cblasobjs = (lsame, xerbla); -@halfblasobjs = (sbgemm, sbdot, shstobf16, shdtobf16, sbf16tos, dbf16tod); +@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); @cblasobjsc = ( cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, @@ -94,7 +94,7 @@ @cblasobjs = ( cblas_xerbla ); -@halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_shstobf16, cblas_shdtobf16, cblas_sbf16tos, cblas_dbf16tod); +@halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod); @exblasobjs = ( qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, diff --git a/interface/Makefile b/interface/Makefile index 1905827f9..6b247b49f 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -775,9 +775,9 @@ dsdot.$(SUFFIX) dsdot.$(PSUFFIX) : dsdot.c ifeq ($(BUILD_BFLOAT16),1) sbdot.$(SUFFIX) sbdot.$(PSUFFIX) : bf16dot.c $(CC) $(CFLAGS) -c $< -o $(@F) -shstobf16.$(SUFFIX) shstobf16.$(PSUFFIX) : tobf16.c +sbstobf16.$(SUFFIX) sbstobf16.$(PSUFFIX) : tobf16.c $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) -shdtobf16.$(SUFFIX) shdtobf16.$(PSUFFIX) : tobf16.c +sbdtobf16.$(SUFFIX) sbdtobf16.$(PSUFFIX) : tobf16.c $(CC) $(CFLAGS) -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) sbf16tos.$(SUFFIX) sbf16tos.$(PSUFFIX) : bf16to.c $(CC) $(CFLAGS) -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) @@ -1526,9 +1526,9 @@ cblas_dsdot.$(SUFFIX) cblas_dsdot.$(PSUFFIX) : dsdot.c ifeq ($(BUILD_BFLOAT16),1) cblas_sbdot.$(SUFFIX) cblas_sbdot.$(PSUFFIX) : bf16dot.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) -cblas_shstobf16.$(SUFFIX) cblas_shstobf16.$(PSUFFIX) : tobf16.c +cblas_sbstobf16.$(SUFFIX) cblas_sbstobf16.$(PSUFFIX) : tobf16.c $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) -cblas_shdtobf16.$(SUFFIX) cblas_shdtobf16.$(PSUFFIX) : tobf16.c +cblas_sbdtobf16.$(SUFFIX) cblas_sbdtobf16.$(PSUFFIX) : tobf16.c $(CC) $(CFLAGS) -DCBLAS -USINGLE_PREC -DDOUBLE_PREC -c $< -o $(@F) cblas_sbf16tos.$(SUFFIX) cblas_sbf16tos.$(PSUFFIX) : bf16to.c $(CC) $(CFLAGS) -DCBLAS -DSINGLE_PREC -UDOUBLE_PREC -c $< -o $(@F) diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1 index 6fe6778d0..7ad94118a 100644 --- a/kernel/Makefile.L1 +++ b/kernel/Makefile.L1 @@ -531,11 +531,11 @@ XBLASOBJS += \ xscal_k$(TSUFFIX).$(SUFFIX) xswap_k$(TSUFFIX).$(SUFFIX) xsum_k$(TSUFFIX).$(SUFFIX) ifeq ($(BUILD_BFLOAT16),1) -SHBLASOBJS += \ +SBBLASOBJS += \ sbdot_k$(TSUFFIX).$(SUFFIX) -SHEXTOBJS += \ +SBEXTOBJS += \ sbstobf16_k$(TSUFFIX).$(SUFFIX) sbdtobf16_k$(TSUFFIX).$(SUFFIX) -SHEXTOBJS += \ +SBEXTOBJS += \ sbf16tos_k$(TSUFFIX).$(SUFFIX) dbf16tod_k$(TSUFFIX).$(SUFFIX) endif diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 65d429012..2ba593c2e 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -94,7 +94,7 @@ SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX) SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) endif -SHKERNELOBJS += \ +SBKERNELOBJS += \ sbgemm_kernel$(TSUFFIX).$(SUFFIX) \ $(SBGEMMINCOPYOBJ) $(SBGEMMITCOPYOBJ) \ $(SBGEMMONCOPYOBJ) $(SBGEMMOTCOPYOBJ) @@ -150,7 +150,7 @@ XKERNELOBJS += \ $(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ) ifeq ($(BUILD_BFLOAT16),1) -SHBLASOBJS += $(SHKERNELOBJS) +SBBLASOBJS += $(SBKERNELOBJS) endif SBLASOBJS += $(SKERNELOBJS) DBLASOBJS += $(DKERNELOBJS) @@ -160,7 +160,7 @@ ZBLASOBJS += $(ZKERNELOBJS) XBLASOBJS += $(XKERNELOBJS) ifeq ($(BUILD_BFLOAT16),1) -SHBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) +SBBLASOBJS += sbgemm_beta$(TSUFFIX).$(SUFFIX) endif ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" diff --git a/test/Makefile b/test/Makefile index 06fb7fe86..212343389 100644 --- a/test/Makefile +++ b/test/Makefile @@ -214,11 +214,9 @@ endif -#ifeq ($(BUILD_BFLOAT16),1) -#level3 : test_sbgemm sblat3 dblat3 cblat3 zblat3 -#else -#level3 : sblat3 dblat3 cblat3 zblat3 -#endif +ifeq ($(BUILD_BFLOAT16),1) +level3 : test_sbgemm +endif ifndef CROSS rm -f ?BLAT3.SUMM