Merge branch 'xianyi:develop' into cirrusjobs

This commit is contained in:
Martin Kroeker 2023-04-17 18:27:42 +02:00 committed by GitHub
commit 0724df404c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 138 additions and 109 deletions

View File

@ -20,6 +20,8 @@ include(CMakePackageConfigHelpers)
#######
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF)
option(BUILD_LAPACK_DEPRECATED "When building LAPACK, include also some older, deprecated routines" ON)
option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON)
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)
@ -398,15 +400,45 @@ if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()
if (${BUILD_LAPACK_DEPRECATED})
set (BLD 1)
else ()
set (BLD 0)
endif()
if (${BUILD_BFLOAT16})
set (BBF16 1)
else ()
set (BBF16 0)
endif()
if (${BUILD_SINGLE})
set (BS 1)
else ()
set (BS 0)
endif()
if (${BUILD_DOUBLE})
set (BD 1)
else ()
set (BD 0)
endif()
if (${BUILD_COMPLEX})
set (BC 1)
else ()
set (BC 0)
endif()
if (${BUILD_COMPLEX16})
set (BZ 1)
else ()
set (BZ 0)
endif()
if (NOT USE_PERL)
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" "${BUILD_BFLOAT16}" "${BUILD_SINGLE}" "${BUILD_DOUBLE}" "${BUILD_COMPLEX}" "${BUILD_COMPLEX16}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)
else()
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" "${BUILD_BFLOAT16}" "${BUILD_SINGLE}" "${BUILD_DOUBLE}" "${BUILD_COMPLEX}" "${BUILD_COMPLEX16}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)

View File

@ -23,6 +23,9 @@
* Optimization on AMD Piledriver
* Optimization on Intel Haswell
* Chris Sidebottom <chris.sidebottom@arm.com>
* Optimizations and other improvements targeting AArch64
## Previous Developers
* Zaheer Chothia <zaheer.chothia@gmail.com>

View File

@ -40,9 +40,9 @@ LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS))
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test
.PHONY : all libs netlib $(RELA) test ctest shared install
.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test
.NOTPARALLEL : shared
all :: libs netlib $(RELA) tests shared
all :: tests
@echo
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
@echo
@ -150,7 +150,7 @@ ifeq ($(OSNAME), CYGWIN_NT)
endif
endif
tests : libs netlib $(RELA) shared
tests : shared
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
touch $(LIBNAME)
ifndef NO_FBLAS

View File

@ -35,9 +35,12 @@ if [ "`dirname \"$compiler_name\"`" != '.' ]; then
cross_suffix="$cross_suffix`dirname \"$compiler_name\"`/"
fi
bn=`basename $compiler_name`
bn=`basename \"$compiler_name\"`
case "$bn" in
*-*) cross_suffix="$cross_suffix${bn%-*}-"
*-*) if [ "$bn" != '-' ]; then
cross_suffix="$cross_suffix${bn%-*}-"
fi
esac
compiler=""

View File

@ -436,6 +436,7 @@ if(USE_XBLAS)
set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC})
endif()
if(BUILD_LAPACK_DEPRECATED)
list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f
DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f
DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f)
@ -449,6 +450,7 @@ list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f
DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f
DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f)
message(STATUS "Building deprecated routines")
endif()
set(DSLASRC spotrs.f)
@ -930,6 +932,7 @@ if(USE_XBLAS)
set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC})
endif()
if(BUILD_LAPACK_DEPRECATED)
list(APPEND SLASRC DEPRECATED/sgegs.c DEPRECATED/sgegv.c
DEPRECATED/sgeqpf.c DEPRECATED/sgelsx.c DEPRECATED/sggsvd.c
DEPRECATED/sggsvp.c DEPRECATED/slahrd.c DEPRECATED/slatzm.c DEPRECATED/stzrqf.c)
@ -943,6 +946,7 @@ list(APPEND ZLASRC DEPRECATED/zgegs.c DEPRECATED/zgegv.c
DEPRECATED/zgeqpf.c DEPRECATED/zgelsx.c DEPRECATED/zggsvd.c
DEPRECATED/zggsvp.c DEPRECATED/zlahrd.c DEPRECATED/zlatzm.c DEPRECATED/ztzrqf.c)
message(STATUS "Building deprecated routines")
endif()
set(DSLASRC spotrs.c)

View File

@ -70,8 +70,6 @@ set(CSRC
lapacke_cgeqlf_work.c
lapacke_cgeqp3.c
lapacke_cgeqp3_work.c
lapacke_cgeqpf.c
lapacke_cgeqpf_work.c
lapacke_cgeqr.c
lapacke_cgeqr_work.c
lapacke_cgeqr2.c
@ -144,12 +142,8 @@ set(CSRC
lapacke_cggqrf_work.c
lapacke_cggrqf.c
lapacke_cggrqf_work.c
lapacke_cggsvd.c
lapacke_cggsvd_work.c
lapacke_cggsvd3.c
lapacke_cggsvd3_work.c
lapacke_cggsvp.c
lapacke_cggsvp_work.c
lapacke_cggsvp3.c
lapacke_cggsvp3_work.c
lapacke_cgtcon.c
@ -695,8 +689,6 @@ set(DSRC
lapacke_dgeqlf_work.c
lapacke_dgeqp3.c
lapacke_dgeqp3_work.c
lapacke_dgeqpf.c
lapacke_dgeqpf_work.c
lapacke_dgeqr.c
lapacke_dgeqr_work.c
lapacke_dgeqr2.c
@ -771,12 +763,8 @@ set(DSRC
lapacke_dggqrf_work.c
lapacke_dggrqf.c
lapacke_dggrqf_work.c
lapacke_dggsvd.c
lapacke_dggsvd_work.c
lapacke_dggsvd3.c
lapacke_dggsvd3_work.c
lapacke_dggsvp.c
lapacke_dggsvp_work.c
lapacke_dggsvp3.c
lapacke_dggsvp3_work.c
lapacke_dgtcon.c
@ -1275,8 +1263,6 @@ set(SSRC
lapacke_sgeqlf_work.c
lapacke_sgeqp3.c
lapacke_sgeqp3_work.c
lapacke_sgeqpf.c
lapacke_sgeqpf_work.c
lapacke_sgeqr.c
lapacke_sgeqr_work.c
lapacke_sgeqr2.c
@ -1351,12 +1337,8 @@ set(SSRC
lapacke_sggqrf_work.c
lapacke_sggrqf.c
lapacke_sggrqf_work.c
lapacke_sggsvd.c
lapacke_sggsvd_work.c
lapacke_sggsvd3.c
lapacke_sggsvd3_work.c
lapacke_sggsvp.c
lapacke_sggsvp_work.c
lapacke_sggsvp3.c
lapacke_sggsvp3_work.c
lapacke_sgtcon.c
@ -1849,8 +1831,6 @@ set(ZSRC
lapacke_zgeqlf_work.c
lapacke_zgeqp3.c
lapacke_zgeqp3_work.c
lapacke_zgeqpf.c
lapacke_zgeqpf_work.c
lapacke_zgeqr.c
lapacke_zgeqr_work.c
lapacke_zgeqr2.c
@ -1925,12 +1905,8 @@ set(ZSRC
lapacke_zggqrf_work.c
lapacke_zggrqf.c
lapacke_zggrqf_work.c
lapacke_zggsvd.c
lapacke_zggsvd_work.c
lapacke_zggsvd3.c
lapacke_zggsvd3_work.c
lapacke_zggsvp.c
lapacke_zggsvp_work.c
lapacke_zggsvp3.c
lapacke_zggsvp3_work.c
lapacke_zgtcon.c
@ -2401,6 +2377,12 @@ set(ZSRC
lapacke_csyr_work.c
lapacke_ilaver.c
)
if (BUILD_LAPACK_DEPRECATED)
set(SRCS $SRCS lapacke_sgeqpf.c lapacke_sgeqpf_work.c lapacke_sggsvd.c lapacke_sggsvd_work.c lapacke_sggsvp.c lapacke_sggsvp_work.c)
set(SRCD $SRCD lapacke_dgeqpf.c lapacke_dgeqpf_work.c lapacke_dggsvd.c lapacke_dggsvd_work.c lapacke_dggsvp.c lapacke_dggsvp_work.c)
set(SRCC $SRCC lapacke_cgeqpf.c lapacke_cgeqpf_work.c lapacke_cggsvd.c lapacke_cggsvd_work.c lapacke_cggsvp.c lapacke_cggsvp_work.c)
set(SRCZ $SRCZ lapacke_zgeqpf.c lapacke_zgeqpf_work.c lapacke_zggsvd.c lapacke_zggsvd_work.c lapacke_zggsvp.c lapacke_zggsvp_work.c)
endif()
set(SRCX
lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c

View File

@ -268,7 +268,8 @@ int detect(void)
#else
#ifdef __APPLE__
sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
if (value ==131287967|| value == 458787763 ) return CPU_VORTEX;
if (value ==131287967|| value == 458787763 ) return CPU_VORTEX; //A12/M1
if (value == 3660830781) return CPU_VORTEX; //A15/M2
#endif
return CPU_ARMV8;
#endif

View File

@ -1936,7 +1936,8 @@ static char *corename[] = {
"ZEN",
"SKYLAKEX",
"DHYANA",
"COOPERLAKE"
"COOPERLAKE",
"SAPPHIRERAPIDS",
};
static char *corename_lower[] = {
@ -1970,7 +1971,8 @@ static char *corename_lower[] = {
"zen",
"skylakex",
"dhyana",
"cooperlake"
"cooperlake",
"sapphirerapids",
};
@ -2276,16 +2278,18 @@ int get_coretype(void){
return CORE_NEHALEM;
}
if (model == 15) { // Sapphire Rapids
if(support_amx_bf16())
return CORE_SAPPHIRERAPIDS;
if(support_avx512_bf16())
return CPUTYPE_COOPERLAKE;
return CORE_COOPERLAKE;
if(support_avx512())
return CPUTYPE_SKYLAKEX;
return CORE_SKYLAKEX;
if(support_avx2())
return CPUTYPE_HASWELL;
return CORE_HASWELL;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
return CORE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
return CORE_NEHALEM;
}
break;

View File

@ -21,7 +21,7 @@ blasobjsc="
chbmv chemm chemv cher2 cher2k cher cherk scabs1 scamax
chpmv chpr2 chpr crotg cscal csrot csscal cswap scamin scasum scnrm2
csymm csyr2k csyrk ctbmv ctbsv ctpmv ctpsv ctrmm ctrmv ctrsm
ctrsv icamax icamin cimatcopy comatcopy cgeadd scsum"
ctrsv icamax icamin cimatcopy comatcopy cgeadd scsum cgemmt"
blasobjsd="
damax damin dasum daxpy daxpby dcabs1 dcopy ddot dgbmv dgemm
@ -29,7 +29,7 @@ blasobjsd="
dscal dsdot dspmv dspr2 dimatcopy domatcopy
dspr dswap dsymm dsymv dsyr2 dsyr2k dsyr dsyrk dtbmv dtbsv
dtpmv dtpsv dtrmm dtrmv dtrsm dtrsv
idamax idamin idmax idmin dgeadd dsum"
idamax idamin idmax idmin dgeadd dsum dgemmt"
blasobjss="
isamax isamin ismax ismin
@ -38,7 +38,7 @@ blasobjss="
smax smin snrm2 simatcopy somatcopy
srot srotg srotm srotmg ssbmv sscal sspmv sspr2 sspr sswap
ssymm ssymv ssyr2 ssyr2k ssyr ssyrk stbmv stbsv stpmv stpsv
strmm strmv strsm strsv sgeadd ssum"
strmm strmv strsm strsv sgeadd ssum sgemmt"
blasobjsz="
izamax izamin
@ -48,7 +48,7 @@ blasobjsz="
zhpr zrotg zscal zswap zsymm zsyr2k zsyrk ztbmv
ztbsv ztpmv ztpsv ztrmm ztrmv ztrsm ztrsv
zomatcopy zimatcopy dzamax dzamin dzasum dznrm2
zgeadd dzsum"
zgeadd dzsum zgemmt"
blasobjs="lsame xerbla"
bfblasobjs="sbgemm sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod"
@ -58,7 +58,7 @@ cblasobjsc="
cblas_cher cblas_cherk cblas_chpmv cblas_chpr2 cblas_chpr cblas_cscal cblas_caxpby
cblas_csscal cblas_cswap cblas_csymm cblas_csyr2k cblas_csyrk cblas_ctbmv cblas_cgeadd
cblas_ctbsv cblas_ctpmv cblas_ctpsv cblas_ctrmm cblas_ctrmv cblas_ctrsm cblas_ctrsv
cblas_scnrm2 cblas_scasum
cblas_scnrm2 cblas_scasum cblas_cgemmt
cblas_icamax cblas_icamin cblas_icmin cblas_icmax cblas_scsum cblas_cimatcopy cblas_comatcopy
"
cblasobjsd="
@ -67,7 +67,7 @@ cblasobjsd="
cblas_drot cblas_drotg cblas_drotm cblas_drotmg cblas_dsbmv cblas_dscal cblas_dsdot
cblas_dspmv cblas_dspr2 cblas_dspr cblas_dswap cblas_dsymm cblas_dsymv cblas_dsyr2
cblas_dsyr2k cblas_dsyr cblas_dsyrk cblas_dtbmv cblas_dtbsv cblas_dtpmv cblas_dtpsv
cblas_dtrmm cblas_dtrmv cblas_dtrsm cblas_dtrsv cblas_daxpby cblas_dgeadd
cblas_dtrmm cblas_dtrmv cblas_dtrsm cblas_dtrsv cblas_daxpby cblas_dgeadd cblas_dgemmt
cblas_idamax cblas_idamin cblas_idmin cblas_idmax cblas_dsum cblas_dimatcopy cblas_domatcopy
"
@ -78,7 +78,7 @@ cblasobjss="
cblas_srotm cblas_srotmg cblas_ssbmv cblas_sscal cblas_sspmv cblas_sspr2 cblas_sspr
cblas_sswap cblas_ssymm cblas_ssymv cblas_ssyr2 cblas_ssyr2k cblas_ssyr cblas_ssyrk
cblas_stbmv cblas_stbsv cblas_stpmv cblas_stpsv cblas_strmm cblas_strmv cblas_strsm
cblas_strsv cblas_sgeadd
cblas_strsv cblas_sgeadd cblas_sgemmt
cblas_isamax cblas_isamin cblas_ismin cblas_ismax cblas_ssum cblas_simatcopy cblas_somatcopy
"
@ -89,7 +89,7 @@ cblasobjsz="
cblas_zhpr cblas_zscal cblas_zswap cblas_zsymm cblas_zsyr2k cblas_zsyrk
cblas_ztbmv cblas_ztbsv cblas_ztpmv cblas_ztpsv cblas_ztrmm cblas_ztrmv cblas_ztrsm
cblas_ztrsv cblas_cdotc_sub cblas_cdotu_sub cblas_zdotc_sub cblas_zdotu_sub
cblas_zaxpby cblas_zgeadd
cblas_zaxpby cblas_zgeadd cblas_zgemmt
cblas_izamax cblas_izamin cblas_izmin cblas_izmax cblas_dzsum cblas_zimatcopy cblas_zomatcopy
"

View File

@ -21,7 +21,7 @@
chbmv,chemm,chemv,cher2,cher2k,cher,cherk,scabs1,scamax,
chpmv,chpr2,chpr,crotg,cscal,csrot,csscal,cswap,scamin,scasum,scnrm2,
csymm,csyr2k,csyrk,ctbmv,ctbsv,ctpmv,ctpsv,ctrmm,ctrmv,ctrsm,
ctrsv,icamax,icamin,cimatcopy,comatcopy,cgeadd,scsum);
ctrsv,icamax,icamin,cimatcopy,comatcopy,cgeadd,scsum,cgemmt);
@blasobjsd = (
damax,damin,dasum,daxpy,daxpby,dcabs1,dcopy,ddot,dgbmv,dgemm,
@ -29,7 +29,7 @@
dscal,dsdot,dspmv,dspr2,dimatcopy,domatcopy,
dspr,dswap,dsymm,dsymv,dsyr2,dsyr2k,dsyr,dsyrk,dtbmv,dtbsv,
dtpmv,dtpsv,dtrmm,dtrmv,dtrsm,dtrsv,
idamax,idamin,idmax,idmin,dgeadd,dsum);
idamax,idamin,idmax,idmin,dgeadd,dsum,dgemmt);
@blasobjss = (
isamax,isamin,ismax,ismin,
@ -38,7 +38,7 @@
smax,smin,snrm2,simatcopy,somatcopy,
srot,srotg,srotm,srotmg,ssbmv,sscal,sspmv,sspr2,sspr,sswap,
ssymm,ssymv,ssyr2,ssyr2k,ssyr,ssyrk,stbmv,stbsv,stpmv,stpsv,
strmm,strmv,strsm,strsv, sgeadd,ssum);
strmm,strmv,strsm,strsv, sgeadd,ssum,sgemmt);
@blasobjsz = (
izamax,izamin,,
@ -48,7 +48,7 @@
zhpr,zrotg,zscal,zswap,zsymm,zsyr2k,zsyrk,ztbmv,
ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv,
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
zgeadd, dzsum);
zgeadd, dzsum, zgemmt);
@blasobjs = (lsame, xerbla);
@bfblasobjs = (sbgemm, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
@ -60,7 +60,7 @@
cblas_ctbsv, cblas_ctpmv, cblas_ctpsv, cblas_ctrmm, cblas_ctrmv, cblas_ctrsm, cblas_ctrsv,
cblas_scnrm2, cblas_scasum,
cblas_icamax, cblas_icamin, cblas_icmin, cblas_icmax, cblas_scsum,cblas_cimatcopy,cblas_comatcopy
);
cblas_cgemmt);
@cblasobjsd = (
cblas_dasum, cblas_daxpy, cblas_dcopy, cblas_ddot,
cblas_dgbmv, cblas_dgemm, cblas_dgemv, cblas_dger, cblas_dnrm2,
@ -69,7 +69,7 @@
cblas_dsyr2k, cblas_dsyr, cblas_dsyrk, cblas_dtbmv, cblas_dtbsv, cblas_dtpmv, cblas_dtpsv,
cblas_dtrmm, cblas_dtrmv, cblas_dtrsm, cblas_dtrsv, cblas_daxpby, cblas_dgeadd,
cblas_idamax, cblas_idamin, cblas_idmin, cblas_idmax, cblas_dsum,cblas_dimatcopy,cblas_domatcopy
);
cblas_dgemmt);
@cblasobjss = (
cblas_sasum, cblas_saxpy, cblas_saxpby,
@ -80,7 +80,7 @@
cblas_stbmv, cblas_stbsv, cblas_stpmv, cblas_stpsv, cblas_strmm, cblas_strmv, cblas_strsm,
cblas_strsv, cblas_sgeadd,
cblas_isamax, cblas_isamin, cblas_ismin, cblas_ismax, cblas_ssum,cblas_simatcopy,cblas_somatcopy
);
cblas_sgemmt);
@cblasobjsz = (
cblas_dzasum, cblas_dznrm2, cblas_zaxpy, cblas_zcopy, cblas_zdotc, cblas_zdotu, cblas_zdscal,
cblas_zgbmv, cblas_zgemm, cblas_zgemv, cblas_zgerc, cblas_zgeru, cblas_zhbmv, cblas_zhemm,
@ -90,7 +90,7 @@
cblas_ztrsv, cblas_cdotc_sub, cblas_cdotu_sub, cblas_zdotc_sub, cblas_zdotu_sub,
cblas_zaxpby, cblas_zgeadd,
cblas_izamax, cblas_izamin, cblas_izmin, cblas_izmax, cblas_dzsum,cblas_zimatcopy,cblas_zomatcopy
);
cblas_zgemmt);
@cblasobjs = ( cblas_xerbla );

View File

@ -1930,15 +1930,15 @@ printf("ELF_VERSION=2\n");
#ifdef MAKE_NB_JOBS
#if MAKE_NB_JOBS > 0
printf("MAKE += -j %d\n", MAKE_NB_JOBS);
printf("MAKEFLAGS += -j %d\n", MAKE_NB_JOBS);
#else
// Let make use parent -j argument or -j1 if there
// is no make parent
#endif
#elif NO_PARALLEL_MAKE==1
printf("MAKE += -j 1\n");
printf("MAKEFLAGS += -j 1\n");
#else
printf("MAKE += -j %d\n", get_num_cores());
printf("MAKEFLAGS += -j %d\n", get_num_cores());
#endif
break;

View File

@ -35,29 +35,26 @@
#include <stdio.h>
#include <stdlib.h>
#include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif
#ifndef COMPLEX
#define SMP_THRESHOLD_MIN 65536.0
#ifdef XDOUBLE
#define ERROR_NAME "QGEMT "
#define ERROR_NAME "QGEMMT "
#elif defined(DOUBLE)
#define ERROR_NAME "DGEMT "
#define ERROR_NAME "DGEMMT "
#elif defined(BFLOAT16)
#define ERROR_NAME "SBGEMT "
#define ERROR_NAME "SBGEMMT "
#else
#define ERROR_NAME "SGEMT "
#define ERROR_NAME "SGEMMT "
#endif
#else
#define SMP_THRESHOLD_MIN 8192.0
#ifdef XDOUBLE
#define ERROR_NAME "XGEMT "
#define ERROR_NAME "XGEMMT "
#elif defined(DOUBLE)
#define ERROR_NAME "ZGEMT "
#define ERROR_NAME "ZGEMMT "
#else
#define ERROR_NAME "CGEMT "
#define ERROR_NAME "CGEMMT "
#endif
#endif
@ -68,13 +65,13 @@
#ifndef CBLAS
void NAME(char *UPLO, char *TRANSA, char *TRANSB,
blasint * M, blasint * N, blasint * K,
blasint * M, blasint * K,
FLOAT * Alpha,
IFLOAT * a, blasint * ldA,
IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC)
{
blasint m, n, k;
blasint m, k;
blasint lda, ldb, ldc;
int transa, transb, uplo;
blasint info;
@ -92,7 +89,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
PRINT_DEBUG_NAME;
m = *M;
n = *N;
k = *K;
#if defined(COMPLEX)
@ -167,8 +163,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
info = 13;
if (k < 0)
info = 5;
if (n < 0)
info = 4;
if (m < 0)
info = 3;
if (transb < 0)
@ -184,7 +178,7 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M,
blasint N, blasint k,
blasint k,
#ifndef COMPLEX
FLOAT alpha,
IFLOAT * A, blasint LDA,
@ -205,7 +199,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
int transa, transb, uplo;
blasint info;
blasint m, n, lda, ldb;
blasint m, lda, ldb;
FLOAT *a, *b;
XFLOAT *buffer;
@ -248,9 +242,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
transb = 3;
#endif
m = M;
n = N;
a = (void *)A;
b = (void *)B;
lda = LDA;
@ -262,8 +253,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
info = 13;
if (k < 0)
info = 5;
if (n < 0)
info = 4;
if (m < 0)
info = 3;
if (transb < 0)
@ -273,8 +262,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
}
if (order == CblasRowMajor) {
m = N;
n = M;
a = (void *)B;
b = (void *)A;
@ -319,8 +306,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
info = 13;
if (k < 0)
info = 5;
if (n < 0)
info = 4;
if (m < 0)
info = 3;
if (transb < 0)
@ -407,37 +392,35 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif
if ((m == 0) || (n == 0))
if ((m == 0) )
return;
IDEBUG_START;
FUNCTION_PROFILE_START();
const blasint incb = (transb == 0) ? 1 : ldb;
if (uplo == 1) {
for (i = 0; i < n; i++) {
j = n - i;
for (i = 0; i < m; i++) {
j = m - i;
l = j;
#if defined(COMPLEX)
aa = a + i * 2;
bb = b + i * ldb * 2;
if (transa) {
l = k;
aa = a + lda * i * 2;
bb = b + i * 2;
}
if (transb)
bb = b + i * 2;
cc = c + i * 2 * ldc + i * 2;
#else
aa = a + i;
bb = b + i * ldb;
if (transa) {
l = k;
aa = a + lda * i;
bb = b + i;
}
if (transb)
bb = b + i;
cc = c + i * ldc + i;
#endif
@ -458,8 +441,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
IDEBUG_START;
FUNCTION_PROFILE_START();
buffer_size = j + k + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT);
@ -479,20 +460,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif
#if defined(COMPLEX)
if (!transa)
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
aa, lda, bb, incb, cc, 1,
buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i,
aa, lda, bb, incb, cc, 1,
buffer);
#else
if (!transa)
(gemv[(int)transa]) (j, k, 0, alpha, aa, lda,
bb, incb, cc, 1, buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha, aa, lda,
bb, incb, cc, 1, buffer);
#endif
#ifdef SMP
} else {
if (!transa)
(gemv_thread[(int)transa]) (j, k, alpha, aa,
lda, bb, incb, cc,
1, buffer,
nthreads);
else
(gemv_thread[(int)transa]) (k, j, alpha, aa,
lda, bb, incb, cc,
1, buffer,
nthreads);
}
#endif
@ -501,21 +496,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
}
} else {
for (i = 0; i < n; i++) {
for (i = 0; i < m; i++) {
j = i + 1;
l = j;
#if defined COMPLEX
bb = b + i * ldb * 2;
if (transa) {
l = k;
if (transb) {
bb = b + i * 2;
}
cc = c + i * 2 * ldc;
#else
bb = b + i * ldb;
if (transa) {
l = k;
if (transb) {
bb = b + i;
}
cc = c + i * ldc;
@ -537,8 +530,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif
IDEBUG_START;
FUNCTION_PROFILE_START();
buffer_size = j + k + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT);
@ -558,30 +549,39 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif
#if defined(COMPLEX)
if (!transa)
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
a, lda, bb, incb, cc, 1,
buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i,
a, lda, bb, incb, cc, 1,
buffer);
#else
if (!transa)
(gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb,
incb, cc, 1, buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb,
incb, cc, 1, buffer);
#endif
#ifdef SMP
} else {
if (!transa)
(gemv_thread[(int)transa]) (j, k, alpha, a, lda,
bb, incb, cc, 1,
buffer, nthreads);
else
(gemv_thread[(int)transa]) (k, j, alpha, a, lda,
bb, incb, cc, 1,
buffer, nthreads);
}
#endif
STACK_FREE(buffer);
}
}
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE,
args.m * args.k + args.k * args.n +
args.m * args.n, 2 * args.m * args.n * args.k);
IDEBUG_END;