Merge branch 'develop' of https://github.com/openmathlib/openblas into develop
This commit is contained in:
commit
32ed6e391a
|
@ -773,8 +773,8 @@ xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *);
|
||||||
|
|
||||||
void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
||||||
void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
||||||
void BLASFUNC(caxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
|
void BLASFUNC(caxpby) (blasint *, void *, float *, blasint *, void *, float *, blasint *);
|
||||||
void BLASFUNC(zaxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
|
void BLASFUNC(zaxpby) (blasint *, void *, double *, blasint *, void *, double *, blasint *);
|
||||||
|
|
||||||
void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||||
void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
||||||
|
|
|
@ -69,7 +69,7 @@ static int cpuid(void)
|
||||||
else if (arch == POWER_9) return CPU_POWER9;
|
else if (arch == POWER_9) return CPU_POWER9;
|
||||||
#endif
|
#endif
|
||||||
#ifdef POWER_10
|
#ifdef POWER_10
|
||||||
else if (arch == POWER_10) return CPU_POWER10;
|
else if (arch >= POWER_10) return CPU_POWER10;
|
||||||
#endif
|
#endif
|
||||||
return CPU_UNKNOWN;
|
return CPU_UNKNOWN;
|
||||||
}
|
}
|
||||||
|
@ -339,6 +339,9 @@ void gotoblas_dynamic_init(void) {
|
||||||
if (gotoblas && gotoblas -> init) {
|
if (gotoblas && gotoblas -> init) {
|
||||||
strncpy(coren,gotoblas_corename(),20);
|
strncpy(coren,gotoblas_corename(),20);
|
||||||
sprintf(coremsg, "Core: %s\n",coren);
|
sprintf(coremsg, "Core: %s\n",coren);
|
||||||
|
if (getenv("GET_OPENBLAS_CORETYPE")) {
|
||||||
|
fprintf(stderr, "%s", coremsg);
|
||||||
|
}
|
||||||
openblas_warning(2, coremsg);
|
openblas_warning(2, coremsg);
|
||||||
gotoblas -> init();
|
gotoblas -> init();
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -60,6 +60,7 @@ cblasobjsc="
|
||||||
cblas_ctbsv cblas_ctpmv cblas_ctpsv cblas_ctrmm cblas_ctrmv cblas_ctrsm cblas_ctrsv
|
cblas_ctbsv cblas_ctpmv cblas_ctpsv cblas_ctrmm cblas_ctrmv cblas_ctrsm cblas_ctrsv
|
||||||
cblas_scnrm2 cblas_scasum cblas_cgemmt
|
cblas_scnrm2 cblas_scasum cblas_cgemmt
|
||||||
cblas_icamax cblas_icamin cblas_icmin cblas_icmax cblas_scsum cblas_cimatcopy cblas_comatcopy
|
cblas_icamax cblas_icamin cblas_icmin cblas_icmax cblas_scsum cblas_cimatcopy cblas_comatcopy
|
||||||
|
cblas_caxpyc cblas_crotg cblas_csrot cblas_scamax cblas_scamin
|
||||||
"
|
"
|
||||||
cblasobjsd="
|
cblasobjsd="
|
||||||
cblas_dasum cblas_daxpy cblas_dcopy cblas_ddot
|
cblas_dasum cblas_daxpy cblas_dcopy cblas_ddot
|
||||||
|
@ -69,6 +70,7 @@ cblasobjsd="
|
||||||
cblas_dsyr2k cblas_dsyr cblas_dsyrk cblas_dtbmv cblas_dtbsv cblas_dtpmv cblas_dtpsv
|
cblas_dsyr2k cblas_dsyr cblas_dsyrk cblas_dtbmv cblas_dtbsv cblas_dtpmv cblas_dtpsv
|
||||||
cblas_dtrmm cblas_dtrmv cblas_dtrsm cblas_dtrsv cblas_daxpby cblas_dgeadd cblas_dgemmt
|
cblas_dtrmm cblas_dtrmv cblas_dtrsm cblas_dtrsv cblas_daxpby cblas_dgeadd cblas_dgemmt
|
||||||
cblas_idamax cblas_idamin cblas_idmin cblas_idmax cblas_dsum cblas_dimatcopy cblas_domatcopy
|
cblas_idamax cblas_idamin cblas_idmin cblas_idmax cblas_dsum cblas_dimatcopy cblas_domatcopy
|
||||||
|
cblas_damax cblas_damin
|
||||||
"
|
"
|
||||||
|
|
||||||
cblasobjss="
|
cblasobjss="
|
||||||
|
@ -80,6 +82,7 @@ cblasobjss="
|
||||||
cblas_stbmv cblas_stbsv cblas_stpmv cblas_stpsv cblas_strmm cblas_strmv cblas_strsm
|
cblas_stbmv cblas_stbsv cblas_stpmv cblas_stpsv cblas_strmm cblas_strmv cblas_strsm
|
||||||
cblas_strsv cblas_sgeadd cblas_sgemmt
|
cblas_strsv cblas_sgeadd cblas_sgemmt
|
||||||
cblas_isamax cblas_isamin cblas_ismin cblas_ismax cblas_ssum cblas_simatcopy cblas_somatcopy
|
cblas_isamax cblas_isamin cblas_ismin cblas_ismax cblas_ssum cblas_simatcopy cblas_somatcopy
|
||||||
|
cblas_samax cblas_samin
|
||||||
"
|
"
|
||||||
|
|
||||||
cblasobjsz="
|
cblasobjsz="
|
||||||
|
@ -91,6 +94,7 @@ cblasobjsz="
|
||||||
cblas_ztrsv cblas_cdotc_sub cblas_cdotu_sub cblas_zdotc_sub cblas_zdotu_sub
|
cblas_ztrsv cblas_cdotc_sub cblas_cdotu_sub cblas_zdotc_sub cblas_zdotu_sub
|
||||||
cblas_zaxpby cblas_zgeadd cblas_zgemmt
|
cblas_zaxpby cblas_zgeadd cblas_zgemmt
|
||||||
cblas_izamax cblas_izamin cblas_izmin cblas_izmax cblas_dzsum cblas_zimatcopy cblas_zomatcopy
|
cblas_izamax cblas_izamin cblas_izmin cblas_izmax cblas_dzsum cblas_zimatcopy cblas_zomatcopy
|
||||||
|
cblas_zaxpyc cblas_zdrot cblas_zrotg cblas_dzamax cblas_dzamin
|
||||||
"
|
"
|
||||||
|
|
||||||
cblasobjs="cblas_xerbla"
|
cblasobjs="cblas_xerbla"
|
||||||
|
@ -861,6 +865,53 @@ lapackobjs2z="$lapackobjs2z
|
||||||
zgedmd
|
zgedmd
|
||||||
zgedmdq
|
zgedmdq
|
||||||
"
|
"
|
||||||
|
|
||||||
|
#functions added post 3.11
|
||||||
|
|
||||||
|
lapackobjs2c="$lapackobjs2c
|
||||||
|
claqp2rk
|
||||||
|
claqp3rk
|
||||||
|
ctrsyl3
|
||||||
|
"
|
||||||
|
# claqz0
|
||||||
|
# claqz1
|
||||||
|
# claqz2
|
||||||
|
# claqz3
|
||||||
|
# clatrs3
|
||||||
|
|
||||||
|
lapackobjs2d="$lapackobjs2d
|
||||||
|
dgelqs
|
||||||
|
dgelst
|
||||||
|
dgeqp3rk
|
||||||
|
dgeqrs
|
||||||
|
dlaqp2rk
|
||||||
|
dlaqp3rk
|
||||||
|
dlarmm
|
||||||
|
dlatrs3
|
||||||
|
dtrsyl3
|
||||||
|
"
|
||||||
|
# dlaqz0
|
||||||
|
# dlaqz1
|
||||||
|
# dlaqz2
|
||||||
|
# dlaqz3
|
||||||
|
# dlaqz4
|
||||||
|
|
||||||
|
lapackobjs2z="$lapackobjs2z
|
||||||
|
zgelqs
|
||||||
|
zgelst
|
||||||
|
zgeqp3rk
|
||||||
|
zgeqrs
|
||||||
|
zlaqp2rk
|
||||||
|
zlaqp3rk
|
||||||
|
zlatrs3
|
||||||
|
zrscl
|
||||||
|
ztrsyl3
|
||||||
|
"
|
||||||
|
# zlaqz0
|
||||||
|
# zlaqz1
|
||||||
|
# zlaqz2
|
||||||
|
# zlaqz3
|
||||||
|
|
||||||
lapack_extendedprecision_objs="
|
lapack_extendedprecision_objs="
|
||||||
zposvxx clagge clatms chesvxx cposvxx cgesvxx ssyrfssx csyrfsx
|
zposvxx clagge clatms chesvxx cposvxx cgesvxx ssyrfssx csyrfsx
|
||||||
dlagsy dsysvxx sporfsx slatms zlatms zherfsx csysvxx
|
dlagsy dsysvxx sporfsx slatms zlatms zherfsx csysvxx
|
||||||
|
@ -1622,6 +1673,14 @@ lapackeobjsc="
|
||||||
LAPACKE_cgetsqrhrt_work
|
LAPACKE_cgetsqrhrt_work
|
||||||
LAPACKE_cungtsqr_row
|
LAPACKE_cungtsqr_row
|
||||||
LAPACKE_cungtsqr_row_work
|
LAPACKE_cungtsqr_row_work
|
||||||
|
LAPACKE_clangb
|
||||||
|
LAPACKE_clangb_work
|
||||||
|
LAPACKE_ctrsyl3
|
||||||
|
LAPACKE_ctrsyl3_work
|
||||||
|
LAPACKE_ctz_nancheck
|
||||||
|
LAPACKE_ctz_trans
|
||||||
|
LAPACKE_cunhr_col
|
||||||
|
LAPACKE_cunhr_col_work
|
||||||
"
|
"
|
||||||
|
|
||||||
lapackeobjsd="
|
lapackeobjsd="
|
||||||
|
@ -2239,6 +2298,14 @@ lapackeobjsd="
|
||||||
LAPACKE_dgetsqrhrt_work
|
LAPACKE_dgetsqrhrt_work
|
||||||
LAPACKE_dorgtsqr_row
|
LAPACKE_dorgtsqr_row
|
||||||
LAPACKE_dorgtsqr_row_work
|
LAPACKE_dorgtsqr_row_work
|
||||||
|
LAPACKE_dlangb
|
||||||
|
LAPACKE_dlangb_work
|
||||||
|
LAPACKE_dorhr_col
|
||||||
|
LAPACKE_dorhr_col_work
|
||||||
|
LAPACKE_dtrsyl3
|
||||||
|
LAPACKE_dtrsyl3_work
|
||||||
|
LAPACKE_dtz_nancheck
|
||||||
|
LAPACKE_dtz_trans
|
||||||
"
|
"
|
||||||
|
|
||||||
lapackeobjss="
|
lapackeobjss="
|
||||||
|
@ -2848,6 +2915,14 @@ lapackeobjss="
|
||||||
LAPACKE_sgetsqrhrt_work
|
LAPACKE_sgetsqrhrt_work
|
||||||
LAPACKE_sorgtsqr_row
|
LAPACKE_sorgtsqr_row
|
||||||
LAPACKE_sorgtsqr_row_work
|
LAPACKE_sorgtsqr_row_work
|
||||||
|
LAPACKE_slangb
|
||||||
|
LAPACKE_slangb_work
|
||||||
|
LAPACKE_sorhr_col
|
||||||
|
LAPACKE_sorhr_col_work
|
||||||
|
LAPACKE_strsyl3
|
||||||
|
LAPACKE_strsyl3_work
|
||||||
|
LAPACKE_stz_nancheck
|
||||||
|
LAPACKE_stz_trans
|
||||||
"
|
"
|
||||||
|
|
||||||
lapackeobjsz="
|
lapackeobjsz="
|
||||||
|
@ -3515,6 +3590,14 @@ lapackeobjsz="
|
||||||
LAPACKE_zgetsqrhrt_work
|
LAPACKE_zgetsqrhrt_work
|
||||||
LAPACKE_zungtsqr_row
|
LAPACKE_zungtsqr_row
|
||||||
LAPACKE_zungtsqr_row_work
|
LAPACKE_zungtsqr_row_work
|
||||||
|
LAPACKE_zlangb
|
||||||
|
LAPACKE_zlangb_work
|
||||||
|
LAPACKE_ztrsyl3
|
||||||
|
LAPACKE_ztrsyl3_work
|
||||||
|
LAPACKE_ztz_nancheck
|
||||||
|
LAPACKE_ztz_trans
|
||||||
|
LAPACKE_zunhr_col
|
||||||
|
LAPACKE_zunhr_col_work
|
||||||
"
|
"
|
||||||
## @(SRCX_OBJ) from `lapack-3.4.1/lapacke/src/Makefile`
|
## @(SRCX_OBJ) from `lapack-3.4.1/lapacke/src/Makefile`
|
||||||
## Not exported: requires LAPACKE_EXTENDED to be set and depends on the
|
## Not exported: requires LAPACKE_EXTENDED to be set and depends on the
|
||||||
|
@ -3616,6 +3699,7 @@ lapack_embeded_underscore_objs_s="
|
||||||
ssysv_aa_2stage ssytrf_aa_2stage
|
ssysv_aa_2stage ssytrf_aa_2stage
|
||||||
ssytrs_aa_2stage
|
ssytrs_aa_2stage
|
||||||
slaorhr_col_getrfnp slaorhr_col_getrfnp2 sorhr_col
|
slaorhr_col_getrfnp slaorhr_col_getrfnp2 sorhr_col
|
||||||
|
slarfb_gett
|
||||||
"
|
"
|
||||||
lapack_embeded_underscore_objs_c="
|
lapack_embeded_underscore_objs_c="
|
||||||
chetf2_rook chetrf_rook chetri_rook
|
chetf2_rook chetrf_rook chetri_rook
|
||||||
|
@ -3641,6 +3725,7 @@ lapack_embeded_underscore_objs_c="
|
||||||
csysv_aa_2stage csytrf_aa_2stage
|
csysv_aa_2stage csytrf_aa_2stage
|
||||||
csytrs_aa_2stage
|
csytrs_aa_2stage
|
||||||
claunhr_col_getrfnp claunhr_col_getrfnp2 cunhr_col
|
claunhr_col_getrfnp claunhr_col_getrfnp2 cunhr_col
|
||||||
|
clarfb_gett
|
||||||
"
|
"
|
||||||
lapack_embeded_underscore_objs_d="
|
lapack_embeded_underscore_objs_d="
|
||||||
dlasyf_rook
|
dlasyf_rook
|
||||||
|
@ -3658,6 +3743,7 @@ lapack_embeded_underscore_objs_d="
|
||||||
dsysv_aa_2stage
|
dsysv_aa_2stage
|
||||||
dsytrf_aa_2stage dsytrs_aa_2stage
|
dsytrf_aa_2stage dsytrs_aa_2stage
|
||||||
dlaorhr_col_getrfnp dlaorhr_col_getrfnp2 dorhr_col
|
dlaorhr_col_getrfnp dlaorhr_col_getrfnp2 dorhr_col
|
||||||
|
dlarfb_gett
|
||||||
"
|
"
|
||||||
lapack_embeded_underscore_objs_z="
|
lapack_embeded_underscore_objs_z="
|
||||||
zhetf2_rook zhetrf_rook zhetri_rook
|
zhetf2_rook zhetrf_rook zhetri_rook
|
||||||
|
@ -3682,6 +3768,7 @@ lapack_embeded_underscore_objs_z="
|
||||||
zhetrs_aa_2stage zsysv_aa_2stage
|
zhetrs_aa_2stage zsysv_aa_2stage
|
||||||
zsytrf_aa_2stage zsytrs_aa_2stage
|
zsytrf_aa_2stage zsytrs_aa_2stage
|
||||||
zlaunhr_col_getrfnp zlaunhr_col_getrfnp2 zunhr_col
|
zlaunhr_col_getrfnp zlaunhr_col_getrfnp2 zunhr_col
|
||||||
|
zlarfb_gett
|
||||||
"
|
"
|
||||||
|
|
||||||
dirname=`pwd -P`/../lapack-netlib
|
dirname=`pwd -P`/../lapack-netlib
|
||||||
|
|
|
@ -39,12 +39,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifndef CBLAS
|
#ifndef CBLAS
|
||||||
|
|
||||||
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY)
|
void NAME(blasint *N, void *VALPHA, FLOAT *x, blasint *INCX, void *VBETA, FLOAT *y, blasint *INCY)
|
||||||
{
|
{
|
||||||
|
|
||||||
blasint n = *N;
|
blasint n = *N;
|
||||||
blasint incx = *INCX;
|
blasint incx = *INCX;
|
||||||
blasint incy = *INCY;
|
blasint incy = *INCY;
|
||||||
|
FLOAT* ALPHA = (FLOAT*) VALPHA;
|
||||||
|
FLOAT* BETA = (FLOAT*) VBETA;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
|
|
@ -81,16 +81,6 @@ float16to32 (bfloat16_bits f16)
|
||||||
return f32.v;
|
return f32.v;
|
||||||
}
|
}
|
||||||
|
|
||||||
float
|
|
||||||
float32to16 (float32_bits f32)
|
|
||||||
{
|
|
||||||
bfloat16_bits f16;
|
|
||||||
f16.bits.s = f32.bits.s;
|
|
||||||
f16.bits.e = f32.bits.e;
|
|
||||||
f16.bits.m = (uint32_t) f32.bits.m >> 16;
|
|
||||||
return f32.v;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
main (int argc, char *argv[])
|
main (int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
@ -110,6 +100,8 @@ main (int argc, char *argv[])
|
||||||
float C[m * n];
|
float C[m * n];
|
||||||
bfloat16_bits AA[m * k], BB[k * n];
|
bfloat16_bits AA[m * k], BB[k * n];
|
||||||
float DD[m * n], CC[m * n];
|
float DD[m * n], CC[m * n];
|
||||||
|
bfloat16 atmp,btmp;
|
||||||
|
blasint one=1;
|
||||||
|
|
||||||
for (j = 0; j < m; j++)
|
for (j = 0; j < m; j++)
|
||||||
{
|
{
|
||||||
|
@ -118,8 +110,10 @@ main (int argc, char *argv[])
|
||||||
A[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
A[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||||
B[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
B[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||||
C[j * k + i] = 0;
|
C[j * k + i] = 0;
|
||||||
AA[j * k + i].v = float32to16( A[j * k + i] );
|
sbstobf16_(&one, &A[j*k+i], &one, &atmp, &one);
|
||||||
BB[j * k + i].v = float32to16( B[j * k + i] );
|
sbstobf16_(&one, &B[j*k+i], &one, &btmp, &one);
|
||||||
|
AA[j * k + i].v = atmp;
|
||||||
|
BB[j * k + i].v = btmp;
|
||||||
CC[j * k + i] = 0;
|
CC[j * k + i] = 0;
|
||||||
DD[j * k + i] = 0;
|
DD[j * k + i] = 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue