From 281e834566a06f1c756d262dc31e809faaf8933f Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Thu, 30 Mar 2023 15:15:25 -0400 Subject: [PATCH 01/14] do not pass -j flag to the MAKE variable --- getarch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/getarch.c b/getarch.c index 937a8db68..87384c084 100644 --- a/getarch.c +++ b/getarch.c @@ -1930,15 +1930,15 @@ printf("ELF_VERSION=2\n"); #ifdef MAKE_NB_JOBS #if MAKE_NB_JOBS > 0 - printf("MAKE += -j %d\n", MAKE_NB_JOBS); + printf("MAKEFLAGS += -j %d\n", MAKE_NB_JOBS); #else // Let make use parent -j argument or -j1 if there // is no make parent #endif #elif NO_PARALLEL_MAKE==1 - printf("MAKE += -j 1\n"); + printf("MAKEFLAGS += -j 1\n"); #else - printf("MAKE += -j %d\n", get_num_cores()); + printf("MAKEFLAGS += -j %d\n", get_num_cores()); #endif break; From 397108fba299c87ce17957452d57469af914f516 Mon Sep 17 00:00:00 2001 From: Guillaume Horel Date: Fri, 31 Mar 2023 09:22:40 -0400 Subject: [PATCH 02/14] serialize shared prerequisites --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 144b3400d..3c4b8948a 100644 --- a/Makefile +++ b/Makefile @@ -40,9 +40,9 @@ LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS)) SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test .PHONY : all libs netlib $(RELA) test ctest shared install -.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test +.NOTPARALLEL : shared -all :: libs netlib $(RELA) tests shared +all :: tests @echo @echo " OpenBLAS build complete. ($(LIB_COMPONENTS))" @echo @@ -150,7 +150,7 @@ ifeq ($(OSNAME), CYGWIN_NT) endif endif -tests : libs netlib $(RELA) shared +tests : shared ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) touch $(LIBNAME) ifndef NO_FBLAS From 3effdc15053a53acc36c421ee8df78cf78879fa9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 7 Apr 2023 19:32:22 +0200 Subject: [PATCH 03/14] Protect CROSS_PATH against spurious addition of linebreaks from isolated dashes fix for #3989 --- c_check | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/c_check b/c_check index e8f90e18a..7173968e5 100755 --- a/c_check +++ b/c_check @@ -35,9 +35,12 @@ if [ "`dirname \"$compiler_name\"`" != '.' ]; then cross_suffix="$cross_suffix`dirname \"$compiler_name\"`/" fi -bn=`basename $compiler_name` +bn=`basename \"$compiler_name\"` + case "$bn" in - *-*) cross_suffix="$cross_suffix${bn%-*}-" + *-*) if [ "$bn" != '-']; then + cross_suffix="$cross_suffix${bn%-*}-" + fi esac compiler="" From fd20a2e8c6c4aa2f47cd1e7019fd9b51176d393e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 10 Apr 2023 22:28:00 +0200 Subject: [PATCH 04/14] Convert CMAKE booleans to 0/1 values for gensymbol --- CMakeLists.txt | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 35fd830ee..8ecd95a95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -398,15 +398,45 @@ if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "") message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}") endif() + if (${BUILD_LAPACK_DEPRECATED}) + set (BLD 1) + else () + set (BLD 0) + endif() + if (${BUILD_BFLOAT16}) + set (BBF16 1) + else () + set (BBF16 0) + endif() + if (${BUILD_SINGLE}) + set (BS 1) + else () + set (BS 0) + endif() + if (${BUILD_DOUBLE}) + set (BD 1) + else () + set (BD 0) + endif() + if (${BUILD_COMPLEX}) + set (BC 1) + else () + set (BC 0) + endif() + if (${BUILD_COMPLEX16}) + set (BZ 1) + else () + set (BZ 0) + endif() if (NOT USE_PERL) add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD - COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" "${BUILD_BFLOAT16}" "${BUILD_SINGLE}" "${BUILD_DOUBLE}" "${BUILD_COMPLEX}" "${BUILD_COMPLEX16}" > ${PROJECT_BINARY_DIR}/objcopy.def + COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so COMMENT "renaming symbols" ) else() add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD - COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" "${BUILD_BFLOAT16}" "${BUILD_SINGLE}" "${BUILD_DOUBLE}" "${BUILD_COMPLEX}" "${BUILD_COMPLEX16}" > ${PROJECT_BINARY_DIR}/objcopy.def + COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so COMMENT "renaming symbols" ) From d5fbec7c20e2bcc6c088cf62029ff5e9a879fcd9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 10 Apr 2023 23:49:35 +0200 Subject: [PATCH 05/14] Export ?MIN/?MAX, ?AMIN/?AMAX, CDOT/ZDOT and ?GEMMT --- exports/gensymbol | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/exports/gensymbol b/exports/gensymbol index 5823c0b3b..7ce85fd7d 100755 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -17,11 +17,11 @@ # removed blas_thread_shutdown_ # blasobjsc=" - caxpy caxpby ccopy cdotc cdotu cgbmv cgemm cgemv cgerc cgeru + camin camax caxpy caxpby ccopy cdot cdotc cdotu cgbmv cgemm cgemv cgerc cgeru chbmv chemm chemv cher2 cher2k cher cherk scabs1 scamax chpmv chpr2 chpr crotg cscal csrot csscal cswap scamin scasum scnrm2 csymm csyr2k csyrk ctbmv ctbsv ctpmv ctpsv ctrmm ctrmv ctrsm - ctrsv icamax icamin cimatcopy comatcopy cgeadd scsum" + ctrsv icamax icamin cimatcopy comatcopy cgeadd scsum cgemmt" blasobjsd=" damax damin dasum daxpy daxpby dcabs1 dcopy ddot dgbmv dgemm @@ -29,7 +29,7 @@ blasobjsd=" dscal dsdot dspmv dspr2 dimatcopy domatcopy dspr dswap dsymm dsymv dsyr2 dsyr2k dsyr dsyrk dtbmv dtbsv dtpmv dtpsv dtrmm dtrmv dtrsm dtrsv - idamax idamin idmax idmin dgeadd dsum" + idamax idamin idmax idmin dgeadd dsum dgemmt" blasobjss=" isamax isamin ismax ismin @@ -38,58 +38,58 @@ blasobjss=" smax smin snrm2 simatcopy somatcopy srot srotg srotm srotmg ssbmv sscal sspmv sspr2 sspr sswap ssymm ssymv ssyr2 ssyr2k ssyr ssyrk stbmv stbsv stpmv stpsv - strmm strmv strsm strsv sgeadd ssum" + strmm strmv strsm strsv sgeadd ssum sgemmt" blasobjsz=" - izamax izamin - zaxpy zaxpby zcopy zdotc zdotu zdrot + zamin zamax izamax izamin + zaxpy zaxpby zcopy zdot zdotc zdotu zdrot zdscal zgbmv zgemm zgemv zgerc zgeru zhbmv zhemm zhemv zher2 zher2k zher zherk zhpmv zhpr2 zhpr zrotg zscal zswap zsymm zsyr2k zsyrk ztbmv ztbsv ztpmv ztpsv ztrmm ztrmv ztrsm ztrsv zomatcopy zimatcopy dzamax dzamin dzasum dznrm2 - zgeadd dzsum" + zgeadd dzsum zgemmt" blasobjs="lsame xerbla" bfblasobjs="sbgemm sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod" cblasobjsc=" - cblas_caxpy cblas_ccopy cblas_cdotc cblas_cdotu cblas_cgbmv cblas_cgemm cblas_cgemv + cblas_caxpy cblas_ccopy cblas_cdot cblas_cdotc cblas_cdotu cblas_cgbmv cblas_cgemm cblas_cgemv cblas_cgerc cblas_cgeru cblas_chbmv cblas_chemm cblas_chemv cblas_cher2 cblas_cher2k cblas_cher cblas_cherk cblas_chpmv cblas_chpr2 cblas_chpr cblas_cscal cblas_caxpby cblas_csscal cblas_cswap cblas_csymm cblas_csyr2k cblas_csyrk cblas_ctbmv cblas_cgeadd cblas_ctbsv cblas_ctpmv cblas_ctpsv cblas_ctrmm cblas_ctrmv cblas_ctrsm cblas_ctrsv - cblas_scnrm2 cblas_scasum + cblas_scnrm2 cblas_scasum cblas_camin cblas_camax cblas_cgemmt cblas_cmin cblas_cmax cblas_icamax cblas_icamin cblas_icmin cblas_icmax cblas_scsum cblas_cimatcopy cblas_comatcopy " cblasobjsd=" - cblas_dasum cblas_daxpy cblas_dcopy cblas_ddot - cblas_dgbmv cblas_dgemm cblas_dgemv cblas_dger cblas_dnrm2 + cblas_dasum cblas_daxpy cblas_dcopy cblas_ddot cblas_damin cblas_damax + cblas_dgbmv cblas_dgemm cblas_dgemv cblas_dger cblas_dnrm2 cblas_dmin cblas_dmax cblas_drot cblas_drotg cblas_drotm cblas_drotmg cblas_dsbmv cblas_dscal cblas_dsdot cblas_dspmv cblas_dspr2 cblas_dspr cblas_dswap cblas_dsymm cblas_dsymv cblas_dsyr2 cblas_dsyr2k cblas_dsyr cblas_dsyrk cblas_dtbmv cblas_dtbsv cblas_dtpmv cblas_dtpsv - cblas_dtrmm cblas_dtrmv cblas_dtrsm cblas_dtrsv cblas_daxpby cblas_dgeadd + cblas_dtrmm cblas_dtrmv cblas_dtrsm cblas_dtrsv cblas_daxpby cblas_dgeadd cblas_dgemmt cblas_idamax cblas_idamin cblas_idmin cblas_idmax cblas_dsum cblas_dimatcopy cblas_domatcopy " cblasobjss=" - cblas_sasum cblas_saxpy cblas_saxpby + cblas_sasum cblas_saxpy cblas_saxpby cblas_samin cblas_samax cblas_smax cblas_smin cblas_scopy cblas_sdot cblas_sdsdot cblas_sgbmv cblas_sgemm cblas_sgemv cblas_sger cblas_snrm2 cblas_srot cblas_srotg cblas_srotm cblas_srotmg cblas_ssbmv cblas_sscal cblas_sspmv cblas_sspr2 cblas_sspr cblas_sswap cblas_ssymm cblas_ssymv cblas_ssyr2 cblas_ssyr2k cblas_ssyr cblas_ssyrk cblas_stbmv cblas_stbsv cblas_stpmv cblas_stpsv cblas_strmm cblas_strmv cblas_strsm - cblas_strsv cblas_sgeadd + cblas_strsv cblas_sgeadd cblas_sgemmt cblas_isamax cblas_isamin cblas_ismin cblas_ismax cblas_ssum cblas_simatcopy cblas_somatcopy " cblasobjsz=" - cblas_dzasum cblas_dznrm2 cblas_zaxpy cblas_zcopy cblas_zdotc cblas_zdotu cblas_zdscal + cblas_dzasum cblas_dznrm2 cblas_zaxpy cblas_zcopy cblas_zdot cblas_zdotc cblas_zdotu cblas_zdscal cblas_zgbmv cblas_zgemm cblas_zgemv cblas_zgerc cblas_zgeru cblas_zhbmv cblas_zhemm cblas_zhemv cblas_zher2 cblas_zher2k cblas_zher cblas_zherk cblas_zhpmv cblas_zhpr2 cblas_zhpr cblas_zscal cblas_zswap cblas_zsymm cblas_zsyr2k cblas_zsyrk cblas_ztbmv cblas_ztbsv cblas_ztpmv cblas_ztpsv cblas_ztrmm cblas_ztrmv cblas_ztrsm cblas_ztrsv cblas_cdotc_sub cblas_cdotu_sub cblas_zdotc_sub cblas_zdotu_sub - cblas_zaxpby cblas_zgeadd + cblas_zaxpby cblas_zgeadd cblas_zamin cblas_zamax cblas_zgemmt cblas_zmin cblas_zmax cblas_izamax cblas_izamin cblas_izmin cblas_izmax cblas_dzsum cblas_zimatcopy cblas_zomatcopy " From caa2945138f3c8a6f3f0dacbaf653c283e3cd2cb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 11 Apr 2023 00:04:09 +0200 Subject: [PATCH 06/14] Support Apple A15/M2 cpus through the existing VORTEX target --- cpuid_arm64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index 1080ea974..809f48e95 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -268,7 +268,8 @@ int detect(void) #else #ifdef __APPLE__ sysctlbyname("hw.cpufamily",&value,&length,NULL,0); - if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; + if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1 + if (value == 3660830781) return CPU_VORTEX; //A15/M2 #endif return CPU_ARMV8; #endif From 57bdc36c846cd44396e1d39a5f7a191bda363503 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 11 Apr 2023 22:38:38 +0200 Subject: [PATCH 07/14] add conditionals for BUILD_LAPACK_DEPRECATED --- cmake/lapack.cmake | 4 ++++ cmake/lapacke.cmake | 30 ++++++------------------------ 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index 45dda8686..544e226ab 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -436,6 +436,7 @@ if(USE_XBLAS) set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC}) endif() +if(BUILD_LAPACK_DEPRECATED) list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f) @@ -449,6 +450,7 @@ list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f) message(STATUS "Building deprecated routines") +endif() set(DSLASRC spotrs.f) @@ -930,6 +932,7 @@ if(USE_XBLAS) set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC}) endif() +if(BUILD_LAPACK_DEPRECATED) list(APPEND SLASRC DEPRECATED/sgegs.c DEPRECATED/sgegv.c DEPRECATED/sgeqpf.c DEPRECATED/sgelsx.c DEPRECATED/sggsvd.c DEPRECATED/sggsvp.c DEPRECATED/slahrd.c DEPRECATED/slatzm.c DEPRECATED/stzrqf.c) @@ -943,6 +946,7 @@ list(APPEND ZLASRC DEPRECATED/zgegs.c DEPRECATED/zgegv.c DEPRECATED/zgeqpf.c DEPRECATED/zgelsx.c DEPRECATED/zggsvd.c DEPRECATED/zggsvp.c DEPRECATED/zlahrd.c DEPRECATED/zlatzm.c DEPRECATED/ztzrqf.c) message(STATUS "Building deprecated routines") +endif() set(DSLASRC spotrs.c) diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index 3a9352197..be6a286fe 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -70,8 +70,6 @@ set(CSRC lapacke_cgeqlf_work.c lapacke_cgeqp3.c lapacke_cgeqp3_work.c - lapacke_cgeqpf.c - lapacke_cgeqpf_work.c lapacke_cgeqr.c lapacke_cgeqr_work.c lapacke_cgeqr2.c @@ -144,12 +142,8 @@ set(CSRC lapacke_cggqrf_work.c lapacke_cggrqf.c lapacke_cggrqf_work.c - lapacke_cggsvd.c - lapacke_cggsvd_work.c lapacke_cggsvd3.c lapacke_cggsvd3_work.c - lapacke_cggsvp.c - lapacke_cggsvp_work.c lapacke_cggsvp3.c lapacke_cggsvp3_work.c lapacke_cgtcon.c @@ -695,8 +689,6 @@ set(DSRC lapacke_dgeqlf_work.c lapacke_dgeqp3.c lapacke_dgeqp3_work.c - lapacke_dgeqpf.c - lapacke_dgeqpf_work.c lapacke_dgeqr.c lapacke_dgeqr_work.c lapacke_dgeqr2.c @@ -771,12 +763,8 @@ set(DSRC lapacke_dggqrf_work.c lapacke_dggrqf.c lapacke_dggrqf_work.c - lapacke_dggsvd.c - lapacke_dggsvd_work.c lapacke_dggsvd3.c lapacke_dggsvd3_work.c - lapacke_dggsvp.c - lapacke_dggsvp_work.c lapacke_dggsvp3.c lapacke_dggsvp3_work.c lapacke_dgtcon.c @@ -1275,8 +1263,6 @@ set(SSRC lapacke_sgeqlf_work.c lapacke_sgeqp3.c lapacke_sgeqp3_work.c - lapacke_sgeqpf.c - lapacke_sgeqpf_work.c lapacke_sgeqr.c lapacke_sgeqr_work.c lapacke_sgeqr2.c @@ -1351,12 +1337,8 @@ set(SSRC lapacke_sggqrf_work.c lapacke_sggrqf.c lapacke_sggrqf_work.c - lapacke_sggsvd.c - lapacke_sggsvd_work.c lapacke_sggsvd3.c lapacke_sggsvd3_work.c - lapacke_sggsvp.c - lapacke_sggsvp_work.c lapacke_sggsvp3.c lapacke_sggsvp3_work.c lapacke_sgtcon.c @@ -1849,8 +1831,6 @@ set(ZSRC lapacke_zgeqlf_work.c lapacke_zgeqp3.c lapacke_zgeqp3_work.c - lapacke_zgeqpf.c - lapacke_zgeqpf_work.c lapacke_zgeqr.c lapacke_zgeqr_work.c lapacke_zgeqr2.c @@ -1925,12 +1905,8 @@ set(ZSRC lapacke_zggqrf_work.c lapacke_zggrqf.c lapacke_zggrqf_work.c - lapacke_zggsvd.c - lapacke_zggsvd_work.c lapacke_zggsvd3.c lapacke_zggsvd3_work.c - lapacke_zggsvp.c - lapacke_zggsvp_work.c lapacke_zggsvp3.c lapacke_zggsvp3_work.c lapacke_zgtcon.c @@ -2401,6 +2377,12 @@ set(ZSRC lapacke_csyr_work.c lapacke_ilaver.c ) +if (BUILD_LAPACK_DEPRECATED) +set(SRCS $SRCS lapacke_sgeqpf.c lapacke_sgeqpf_work.c lapacke_sggsvd.c lapacke_sggsvd_work.c lapacke_sggsvp.c lapacke_sggsvp_work.c) +set(SRCD $SRCD lapacke_dgeqpf.c lapacke_dgeqpf_work.c lapacke_dggsvd.c lapacke_dggsvd_work.c lapacke_dggsvp.c lapacke_dggsvp_work.c) +set(SRCC $SRCC lapacke_cgeqpf.c lapacke_cgeqpf_work.c lapacke_cggsvd.c lapacke_cggsvd_work.c lapacke_cggsvp.c lapacke_cggsvp_work.c) +set(SRCZ $SRCZ lapacke_zgeqpf.c lapacke_zgeqpf_work.c lapacke_zggsvd.c lapacke_zggsvd_work.c lapacke_zggsvp.c lapacke_zggsvp_work.c) +endif() set(SRCX lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c From cd8eb33a9c989b479367c2bdd33d7c843c27e3fb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 11 Apr 2023 22:39:53 +0200 Subject: [PATCH 08/14] Expose BUILD_LAPACK_DEPRECATED --- CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ecd95a95..d59290c90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,8 @@ include(CMakePackageConfigHelpers) ####### option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF) +option(BUILD_LAPACK_DEPRECATED "When building LAPACK, include also some older, deprecated routines" ON) + option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON) option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF) @@ -398,12 +400,12 @@ if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "") message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}") endif() - if (${BUILD_LAPACK_DEPRECATED}) + if (${BUILD_LAPACK_DEPRECATED}) set (BLD 1) else () set (BLD 0) endif() - if (${BUILD_BFLOAT16}) + if (${BUILD_BFLOAT16}) set (BBF16 1) else () set (BBF16 0) From 6c45c980835cc0fc0d6fc8751349af54bdaa8426 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 11 Apr 2023 22:41:18 +0200 Subject: [PATCH 09/14] Add (only) the GEMMT functions --- exports/gensymbol | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/exports/gensymbol b/exports/gensymbol index 7ce85fd7d..b584167a4 100755 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -17,7 +17,7 @@ # removed blas_thread_shutdown_ # blasobjsc=" - camin camax caxpy caxpby ccopy cdot cdotc cdotu cgbmv cgemm cgemv cgerc cgeru + caxpy caxpby ccopy cdotc cdotu cgbmv cgemm cgemv cgerc cgeru chbmv chemm chemv cher2 cher2k cher cherk scabs1 scamax chpmv chpr2 chpr crotg cscal csrot csscal cswap scamin scasum scnrm2 csymm csyr2k csyrk ctbmv ctbsv ctpmv ctpsv ctrmm ctrmv ctrsm @@ -41,8 +41,8 @@ blasobjss=" strmm strmv strsm strsv sgeadd ssum sgemmt" blasobjsz=" - zamin zamax izamax izamin - zaxpy zaxpby zcopy zdot zdotc zdotu zdrot + izamax izamin + zaxpy zaxpby zcopy zdotc zdotu zdrot zdscal zgbmv zgemm zgemv zgerc zgeru zhbmv zhemm zhemv zher2 zher2k zher zherk zhpmv zhpr2 zhpr zrotg zscal zswap zsymm zsyr2k zsyrk ztbmv @@ -53,17 +53,17 @@ blasobjsz=" blasobjs="lsame xerbla" bfblasobjs="sbgemm sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod" cblasobjsc=" - cblas_caxpy cblas_ccopy cblas_cdot cblas_cdotc cblas_cdotu cblas_cgbmv cblas_cgemm cblas_cgemv + cblas_caxpy cblas_ccopy cblas_cdotc cblas_cdotu cblas_cgbmv cblas_cgemm cblas_cgemv cblas_cgerc cblas_cgeru cblas_chbmv cblas_chemm cblas_chemv cblas_cher2 cblas_cher2k cblas_cher cblas_cherk cblas_chpmv cblas_chpr2 cblas_chpr cblas_cscal cblas_caxpby cblas_csscal cblas_cswap cblas_csymm cblas_csyr2k cblas_csyrk cblas_ctbmv cblas_cgeadd cblas_ctbsv cblas_ctpmv cblas_ctpsv cblas_ctrmm cblas_ctrmv cblas_ctrsm cblas_ctrsv - cblas_scnrm2 cblas_scasum cblas_camin cblas_camax cblas_cgemmt cblas_cmin cblas_cmax + cblas_scnrm2 cblas_scasum cblas_cgemmt cblas_icamax cblas_icamin cblas_icmin cblas_icmax cblas_scsum cblas_cimatcopy cblas_comatcopy " cblasobjsd=" - cblas_dasum cblas_daxpy cblas_dcopy cblas_ddot cblas_damin cblas_damax - cblas_dgbmv cblas_dgemm cblas_dgemv cblas_dger cblas_dnrm2 cblas_dmin cblas_dmax + cblas_dasum cblas_daxpy cblas_dcopy cblas_ddot + cblas_dgbmv cblas_dgemm cblas_dgemv cblas_dger cblas_dnrm2 cblas_drot cblas_drotg cblas_drotm cblas_drotmg cblas_dsbmv cblas_dscal cblas_dsdot cblas_dspmv cblas_dspr2 cblas_dspr cblas_dswap cblas_dsymm cblas_dsymv cblas_dsyr2 cblas_dsyr2k cblas_dsyr cblas_dsyrk cblas_dtbmv cblas_dtbsv cblas_dtpmv cblas_dtpsv @@ -72,24 +72,24 @@ cblasobjsd=" " cblasobjss=" - cblas_sasum cblas_saxpy cblas_saxpby cblas_samin cblas_samax cblas_smax cblas_smin + cblas_sasum cblas_saxpy cblas_saxpby cblas_scopy cblas_sdot cblas_sdsdot cblas_sgbmv cblas_sgemm cblas_sgemv cblas_sger cblas_snrm2 cblas_srot cblas_srotg cblas_srotm cblas_srotmg cblas_ssbmv cblas_sscal cblas_sspmv cblas_sspr2 cblas_sspr cblas_sswap cblas_ssymm cblas_ssymv cblas_ssyr2 cblas_ssyr2k cblas_ssyr cblas_ssyrk cblas_stbmv cblas_stbsv cblas_stpmv cblas_stpsv cblas_strmm cblas_strmv cblas_strsm - cblas_strsv cblas_sgeadd cblas_sgemmt + cblas_strsv cblas_sgeadd cblas_sgemmt cblas_isamax cblas_isamin cblas_ismin cblas_ismax cblas_ssum cblas_simatcopy cblas_somatcopy " cblasobjsz=" - cblas_dzasum cblas_dznrm2 cblas_zaxpy cblas_zcopy cblas_zdot cblas_zdotc cblas_zdotu cblas_zdscal + cblas_dzasum cblas_dznrm2 cblas_zaxpy cblas_zcopy cblas_zdotc cblas_zdotu cblas_zdscal cblas_zgbmv cblas_zgemm cblas_zgemv cblas_zgerc cblas_zgeru cblas_zhbmv cblas_zhemm cblas_zhemv cblas_zher2 cblas_zher2k cblas_zher cblas_zherk cblas_zhpmv cblas_zhpr2 cblas_zhpr cblas_zscal cblas_zswap cblas_zsymm cblas_zsyr2k cblas_zsyrk cblas_ztbmv cblas_ztbsv cblas_ztpmv cblas_ztpsv cblas_ztrmm cblas_ztrmv cblas_ztrsm cblas_ztrsv cblas_cdotc_sub cblas_cdotu_sub cblas_zdotc_sub cblas_zdotu_sub - cblas_zaxpby cblas_zgeadd cblas_zamin cblas_zamax cblas_zgemmt cblas_zmin cblas_zmax + cblas_zaxpby cblas_zgeadd cblas_zgemmt cblas_izamax cblas_izamin cblas_izmin cblas_izmax cblas_dzsum cblas_zimatcopy cblas_zomatcopy " From 2ea00788c271c7a5727b13f7a90433ce21639042 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 11 Apr 2023 22:46:51 +0200 Subject: [PATCH 10/14] Add ?GEMMT --- exports/gensymbol.pl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/exports/gensymbol.pl b/exports/gensymbol.pl index e38a3cc89..dd79e924d 100644 --- a/exports/gensymbol.pl +++ b/exports/gensymbol.pl @@ -21,7 +21,7 @@ chbmv,chemm,chemv,cher2,cher2k,cher,cherk,scabs1,scamax, chpmv,chpr2,chpr,crotg,cscal,csrot,csscal,cswap,scamin,scasum,scnrm2, csymm,csyr2k,csyrk,ctbmv,ctbsv,ctpmv,ctpsv,ctrmm,ctrmv,ctrsm, - ctrsv,icamax,icamin,cimatcopy,comatcopy,cgeadd,scsum); + ctrsv,icamax,icamin,cimatcopy,comatcopy,cgeadd,scsum,cgemmt); @blasobjsd = ( damax,damin,dasum,daxpy,daxpby,dcabs1,dcopy,ddot,dgbmv,dgemm, @@ -29,7 +29,7 @@ dscal,dsdot,dspmv,dspr2,dimatcopy,domatcopy, dspr,dswap,dsymm,dsymv,dsyr2,dsyr2k,dsyr,dsyrk,dtbmv,dtbsv, dtpmv,dtpsv,dtrmm,dtrmv,dtrsm,dtrsv, - idamax,idamin,idmax,idmin,dgeadd,dsum); + idamax,idamin,idmax,idmin,dgeadd,dsum,dgemmt); @blasobjss = ( isamax,isamin,ismax,ismin, @@ -38,7 +38,7 @@ smax,smin,snrm2,simatcopy,somatcopy, srot,srotg,srotm,srotmg,ssbmv,sscal,sspmv,sspr2,sspr,sswap, ssymm,ssymv,ssyr2,ssyr2k,ssyr,ssyrk,stbmv,stbsv,stpmv,stpsv, - strmm,strmv,strsm,strsv, sgeadd,ssum); + strmm,strmv,strsm,strsv, sgeadd,ssum,sgemmt); @blasobjsz = ( izamax,izamin,, @@ -48,7 +48,7 @@ zhpr,zrotg,zscal,zswap,zsymm,zsyr2k,zsyrk,ztbmv, ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv, zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2, - zgeadd, dzsum); + zgeadd, dzsum, zgemmt); @blasobjs = (lsame, xerbla); @bfblasobjs = (sbgemm, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); @@ -60,7 +60,7 @@ cblas_ctbsv, cblas_ctpmv, cblas_ctpsv, cblas_ctrmm, cblas_ctrmv, cblas_ctrsm, cblas_ctrsv, cblas_scnrm2, cblas_scasum, cblas_icamax, cblas_icamin, cblas_icmin, cblas_icmax, cblas_scsum,cblas_cimatcopy,cblas_comatcopy - ); + cblas_cgemmt); @cblasobjsd = ( cblas_dasum, cblas_daxpy, cblas_dcopy, cblas_ddot, cblas_dgbmv, cblas_dgemm, cblas_dgemv, cblas_dger, cblas_dnrm2, @@ -69,7 +69,7 @@ cblas_dsyr2k, cblas_dsyr, cblas_dsyrk, cblas_dtbmv, cblas_dtbsv, cblas_dtpmv, cblas_dtpsv, cblas_dtrmm, cblas_dtrmv, cblas_dtrsm, cblas_dtrsv, cblas_daxpby, cblas_dgeadd, cblas_idamax, cblas_idamin, cblas_idmin, cblas_idmax, cblas_dsum,cblas_dimatcopy,cblas_domatcopy - ); + cblas_dgemmt); @cblasobjss = ( cblas_sasum, cblas_saxpy, cblas_saxpby, @@ -80,7 +80,7 @@ cblas_stbmv, cblas_stbsv, cblas_stpmv, cblas_stpsv, cblas_strmm, cblas_strmv, cblas_strsm, cblas_strsv, cblas_sgeadd, cblas_isamax, cblas_isamin, cblas_ismin, cblas_ismax, cblas_ssum,cblas_simatcopy,cblas_somatcopy - ); + cblas_sgemmt); @cblasobjsz = ( cblas_dzasum, cblas_dznrm2, cblas_zaxpy, cblas_zcopy, cblas_zdotc, cblas_zdotu, cblas_zdscal, cblas_zgbmv, cblas_zgemm, cblas_zgemv, cblas_zgerc, cblas_zgeru, cblas_zhbmv, cblas_zhemm, @@ -90,7 +90,7 @@ cblas_ztrsv, cblas_cdotc_sub, cblas_cdotu_sub, cblas_zdotc_sub, cblas_zdotu_sub, cblas_zaxpby, cblas_zgeadd, cblas_izamax, cblas_izamin, cblas_izmin, cblas_izmax, cblas_dzsum,cblas_zimatcopy,cblas_zomatcopy -); + cblas_zgemmt); @cblasobjs = ( cblas_xerbla ); From ac650225c1cba69b3aba0d52031af838e3b6d1dc Mon Sep 17 00:00:00 2001 From: Honglin Zhu Date: Thu, 13 Apr 2023 00:08:27 +0800 Subject: [PATCH 11/14] Fix x86 detection error --- cpuid_x86.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/cpuid_x86.c b/cpuid_x86.c index ad13a8c8c..69cbba90e 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1936,7 +1936,8 @@ static char *corename[] = { "ZEN", "SKYLAKEX", "DHYANA", - "COOPERLAKE" + "COOPERLAKE", + "SAPPHIRERAPIDS", }; static char *corename_lower[] = { @@ -1970,7 +1971,8 @@ static char *corename_lower[] = { "zen", "skylakex", "dhyana", - "cooperlake" + "cooperlake", + "sapphirerapids", }; @@ -2276,16 +2278,18 @@ int get_coretype(void){ return CORE_NEHALEM; } if (model == 15) { // Sapphire Rapids + if(support_amx_bf16()) + return CORE_SAPPHIRERAPIDS; if(support_avx512_bf16()) - return CPUTYPE_COOPERLAKE; + return CORE_COOPERLAKE; if(support_avx512()) - return CPUTYPE_SKYLAKEX; + return CORE_SKYLAKEX; if(support_avx2()) - return CPUTYPE_HASWELL; + return CORE_HASWELL; if(support_avx()) - return CPUTYPE_SANDYBRIDGE; + return CORE_SANDYBRIDGE; else - return CPUTYPE_NEHALEM; + return CORE_NEHALEM; } break; From 970e611e007eeb180ce963af89746d9e84f90e8f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 14 Apr 2023 19:42:34 +0200 Subject: [PATCH 12/14] fix missing blank in test --- c_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_check b/c_check index 7173968e5..9be152b12 100755 --- a/c_check +++ b/c_check @@ -38,7 +38,7 @@ fi bn=`basename \"$compiler_name\"` case "$bn" in - *-*) if [ "$bn" != '-']; then + *-*) if [ "$bn" != '-' ]; then cross_suffix="$cross_suffix${bn%-*}-" fi esac From 38d7a7b562860555a6440953124eb47cf7d5b506 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 16 Apr 2023 00:07:58 +0200 Subject: [PATCH 13/14] Fix ?GEMMT --- interface/gemmt.c | 100 +++++++++++++++++++++++----------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/interface/gemmt.c b/interface/gemmt.c index 3eed1dfe4..d35406411 100644 --- a/interface/gemmt.c +++ b/interface/gemmt.c @@ -35,29 +35,26 @@ #include #include #include "common.h" -#ifdef FUNCTION_PROFILE -#include "functable.h" -#endif #ifndef COMPLEX #define SMP_THRESHOLD_MIN 65536.0 #ifdef XDOUBLE -#define ERROR_NAME "QGEMT " +#define ERROR_NAME "QGEMMT " #elif defined(DOUBLE) -#define ERROR_NAME "DGEMT " +#define ERROR_NAME "DGEMMT " #elif defined(BFLOAT16) -#define ERROR_NAME "SBGEMT " +#define ERROR_NAME "SBGEMMT " #else -#define ERROR_NAME "SGEMT " +#define ERROR_NAME "SGEMMT " #endif #else #define SMP_THRESHOLD_MIN 8192.0 #ifdef XDOUBLE -#define ERROR_NAME "XGEMT " +#define ERROR_NAME "XGEMMT " #elif defined(DOUBLE) -#define ERROR_NAME "ZGEMT " +#define ERROR_NAME "ZGEMMT " #else -#define ERROR_NAME "CGEMT " +#define ERROR_NAME "CGEMMT " #endif #endif @@ -68,13 +65,13 @@ #ifndef CBLAS void NAME(char *UPLO, char *TRANSA, char *TRANSB, - blasint * M, blasint * N, blasint * K, + blasint * M, blasint * K, FLOAT * Alpha, IFLOAT * a, blasint * ldA, IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) { - blasint m, n, k; + blasint m, k; blasint lda, ldb, ldc; int transa, transb, uplo; blasint info; @@ -92,7 +89,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, PRINT_DEBUG_NAME; m = *M; - n = *N; k = *K; #if defined(COMPLEX) @@ -167,8 +163,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, info = 13; if (k < 0) info = 5; - if (n < 0) - info = 4; if (m < 0) info = 3; if (transb < 0) @@ -184,7 +178,7 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, - blasint N, blasint k, + blasint k, #ifndef COMPLEX FLOAT alpha, IFLOAT * A, blasint LDA, @@ -205,7 +199,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int transa, transb, uplo; blasint info; - blasint m, n, lda, ldb; + blasint m, lda, ldb; FLOAT *a, *b; XFLOAT *buffer; @@ -248,9 +242,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, transb = 3; #endif - m = M; - n = N; - a = (void *)A; b = (void *)B; lda = LDA; @@ -262,8 +253,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, info = 13; if (k < 0) info = 5; - if (n < 0) - info = 4; if (m < 0) info = 3; if (transb < 0) @@ -273,8 +262,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, } if (order == CblasRowMajor) { - m = N; - n = M; a = (void *)B; b = (void *)A; @@ -319,8 +306,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, info = 13; if (k < 0) info = 5; - if (n < 0) - info = 4; if (m < 0) info = 3; if (transb < 0) @@ -407,37 +392,35 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #endif - if ((m == 0) || (n == 0)) + if ((m == 0) ) return; IDEBUG_START; - FUNCTION_PROFILE_START(); - const blasint incb = (transb == 0) ? 1 : ldb; if (uplo == 1) { - for (i = 0; i < n; i++) { - j = n - i; + for (i = 0; i < m; i++) { + j = m - i; l = j; #if defined(COMPLEX) aa = a + i * 2; bb = b + i * ldb * 2; if (transa) { - l = k; aa = a + lda * i * 2; - bb = b + i * 2; } + if (transb) + bb = b + i * 2; cc = c + i * 2 * ldc + i * 2; #else aa = a + i; bb = b + i * ldb; if (transa) { - l = k; aa = a + lda * i; - bb = b + i; } + if (transb) + bb = b + i; cc = c + i * ldc + i; #endif @@ -458,8 +441,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, IDEBUG_START; - FUNCTION_PROFILE_START(); - buffer_size = j + k + 128 / sizeof(FLOAT); #ifdef WINDOWS_ABI buffer_size += 160 / sizeof(FLOAT); @@ -479,20 +460,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #endif #if defined(COMPLEX) + if (!transa) (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, aa, lda, bb, incb, cc, 1, buffer); + else + (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, + aa, lda, bb, incb, cc, 1, + buffer); #else + if (!transa) (gemv[(int)transa]) (j, k, 0, alpha, aa, lda, bb, incb, cc, 1, buffer); + else + (gemv[(int)transa]) (k, j, 0, alpha, aa, lda, + bb, incb, cc, 1, buffer); #endif #ifdef SMP } else { - + if (!transa) (gemv_thread[(int)transa]) (j, k, alpha, aa, lda, bb, incb, cc, 1, buffer, nthreads); + else + (gemv_thread[(int)transa]) (k, j, alpha, aa, + lda, bb, incb, cc, + 1, buffer, + nthreads); } #endif @@ -501,21 +496,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, } } else { - for (i = 0; i < n; i++) { + for (i = 0; i < m; i++) { j = i + 1; l = j; #if defined COMPLEX bb = b + i * ldb * 2; - if (transa) { - l = k; + if (transb) { bb = b + i * 2; } cc = c + i * 2 * ldc; #else bb = b + i * ldb; - if (transa) { - l = k; + if (transb) { bb = b + i; } cc = c + i * ldc; @@ -537,8 +530,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #endif IDEBUG_START; - FUNCTION_PROFILE_START(); - buffer_size = j + k + 128 / sizeof(FLOAT); #ifdef WINDOWS_ABI buffer_size += 160 / sizeof(FLOAT); @@ -558,30 +549,39 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #endif #if defined(COMPLEX) + if (!transa) (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, a, lda, bb, incb, cc, 1, buffer); + else + (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, + a, lda, bb, incb, cc, 1, + buffer); #else + if (!transa) (gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, incb, cc, 1, buffer); + else + (gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb, + incb, cc, 1, buffer); #endif #ifdef SMP } else { - + if (!transa) (gemv_thread[(int)transa]) (j, k, alpha, a, lda, bb, incb, cc, 1, buffer, nthreads); - + else + (gemv_thread[(int)transa]) (k, j, alpha, a, lda, + bb, incb, cc, 1, + buffer, nthreads); } #endif STACK_FREE(buffer); } } - FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, - args.m * args.k + args.k * args.n + - args.m * args.n, 2 * args.m * args.n * args.k); IDEBUG_END; From bfc20c2e97c9695d1f04c6b5ad16d21fb6c1db76 Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Mon, 17 Apr 2023 11:17:42 +0100 Subject: [PATCH 14/14] Add Chris Sidebottom to CONTRIBUTORS.md --- CONTRIBUTORS.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index f5e9dda91..71df13634 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -23,6 +23,9 @@ * Optimization on AMD Piledriver * Optimization on Intel Haswell +* Chris Sidebottom + * Optimizations and other improvements targeting AArch64 + ## Previous Developers * Zaheer Chothia @@ -212,4 +215,4 @@ In chronological order: * [2022-03] Support RISC-V Vector Intrinisc 1.0 version. * Pablo Romero - * [2022-08] Fix building from sources for QNX \ No newline at end of file + * [2022-08] Fix building from sources for QNX