diff --git a/common_interface.h b/common_interface.h index 61a82c306..5a2e1654c 100644 --- a/common_interface.h +++ b/common_interface.h @@ -773,8 +773,8 @@ xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *); void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *); void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *); -void BLASFUNC(caxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *); -void BLASFUNC(zaxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *); +void BLASFUNC(caxpby) (blasint *, void *, float *, blasint *, void *, float *, blasint *); +void BLASFUNC(zaxpby) (blasint *, void *, double *, blasint *, void *, double *, blasint *); void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *); void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *); diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index 16320dc40..4c1f4a26e 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -69,7 +69,7 @@ static int cpuid(void) else if (arch == POWER_9) return CPU_POWER9; #endif #ifdef POWER_10 - else if (arch == POWER_10) return CPU_POWER10; + else if (arch >= POWER_10) return CPU_POWER10; #endif return CPU_UNKNOWN; } @@ -339,6 +339,9 @@ void gotoblas_dynamic_init(void) { if (gotoblas && gotoblas -> init) { strncpy(coren,gotoblas_corename(),20); sprintf(coremsg, "Core: %s\n",coren); + if (getenv("GET_OPENBLAS_CORETYPE")) { + fprintf(stderr, "%s", coremsg); + } openblas_warning(2, coremsg); gotoblas -> init(); } else { diff --git a/exports/gensymbol b/exports/gensymbol index 704eab06f..226035842 100755 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -60,6 +60,7 @@ cblasobjsc=" cblas_ctbsv cblas_ctpmv cblas_ctpsv cblas_ctrmm cblas_ctrmv cblas_ctrsm cblas_ctrsv cblas_scnrm2 cblas_scasum cblas_cgemmt cblas_icamax cblas_icamin cblas_icmin cblas_icmax cblas_scsum cblas_cimatcopy cblas_comatcopy + cblas_caxpyc cblas_crotg cblas_csrot cblas_scamax cblas_scamin " cblasobjsd=" cblas_dasum cblas_daxpy cblas_dcopy cblas_ddot @@ -69,6 +70,7 @@ cblasobjsd=" cblas_dsyr2k cblas_dsyr cblas_dsyrk cblas_dtbmv cblas_dtbsv cblas_dtpmv cblas_dtpsv cblas_dtrmm cblas_dtrmv cblas_dtrsm cblas_dtrsv cblas_daxpby cblas_dgeadd cblas_dgemmt cblas_idamax cblas_idamin cblas_idmin cblas_idmax cblas_dsum cblas_dimatcopy cblas_domatcopy + cblas_damax cblas_damin " cblasobjss=" @@ -80,6 +82,7 @@ cblasobjss=" cblas_stbmv cblas_stbsv cblas_stpmv cblas_stpsv cblas_strmm cblas_strmv cblas_strsm cblas_strsv cblas_sgeadd cblas_sgemmt cblas_isamax cblas_isamin cblas_ismin cblas_ismax cblas_ssum cblas_simatcopy cblas_somatcopy + cblas_samax cblas_samin " cblasobjsz=" @@ -91,6 +94,7 @@ cblasobjsz=" cblas_ztrsv cblas_cdotc_sub cblas_cdotu_sub cblas_zdotc_sub cblas_zdotu_sub cblas_zaxpby cblas_zgeadd cblas_zgemmt cblas_izamax cblas_izamin cblas_izmin cblas_izmax cblas_dzsum cblas_zimatcopy cblas_zomatcopy + cblas_zaxpyc cblas_zdrot cblas_zrotg cblas_dzamax cblas_dzamin " cblasobjs="cblas_xerbla" @@ -861,6 +865,53 @@ lapackobjs2z="$lapackobjs2z zgedmd zgedmdq " + +#functions added post 3.11 + +lapackobjs2c="$lapackobjs2c + claqp2rk + claqp3rk + ctrsyl3 + " +# claqz0 +# claqz1 +# claqz2 +# claqz3 +# clatrs3 + +lapackobjs2d="$lapackobjs2d + dgelqs + dgelst + dgeqp3rk + dgeqrs + dlaqp2rk + dlaqp3rk + dlarmm + dlatrs3 + dtrsyl3 + " +# dlaqz0 +# dlaqz1 +# dlaqz2 +# dlaqz3 +# dlaqz4 + +lapackobjs2z="$lapackobjs2z + zgelqs + zgelst + zgeqp3rk + zgeqrs + zlaqp2rk + zlaqp3rk + zlatrs3 + zrscl + ztrsyl3 + " +# zlaqz0 +# zlaqz1 +# zlaqz2 +# zlaqz3 + lapack_extendedprecision_objs=" zposvxx clagge clatms chesvxx cposvxx cgesvxx ssyrfssx csyrfsx dlagsy dsysvxx sporfsx slatms zlatms zherfsx csysvxx @@ -1622,6 +1673,14 @@ lapackeobjsc=" LAPACKE_cgetsqrhrt_work LAPACKE_cungtsqr_row LAPACKE_cungtsqr_row_work + LAPACKE_clangb + LAPACKE_clangb_work + LAPACKE_ctrsyl3 + LAPACKE_ctrsyl3_work + LAPACKE_ctz_nancheck + LAPACKE_ctz_trans + LAPACKE_cunhr_col + LAPACKE_cunhr_col_work " lapackeobjsd=" @@ -2239,6 +2298,14 @@ lapackeobjsd=" LAPACKE_dgetsqrhrt_work LAPACKE_dorgtsqr_row LAPACKE_dorgtsqr_row_work + LAPACKE_dlangb + LAPACKE_dlangb_work + LAPACKE_dorhr_col + LAPACKE_dorhr_col_work + LAPACKE_dtrsyl3 + LAPACKE_dtrsyl3_work + LAPACKE_dtz_nancheck + LAPACKE_dtz_trans " lapackeobjss=" @@ -2848,6 +2915,14 @@ lapackeobjss=" LAPACKE_sgetsqrhrt_work LAPACKE_sorgtsqr_row LAPACKE_sorgtsqr_row_work + LAPACKE_slangb + LAPACKE_slangb_work + LAPACKE_sorhr_col + LAPACKE_sorhr_col_work + LAPACKE_strsyl3 + LAPACKE_strsyl3_work + LAPACKE_stz_nancheck + LAPACKE_stz_trans " lapackeobjsz=" @@ -3515,6 +3590,14 @@ lapackeobjsz=" LAPACKE_zgetsqrhrt_work LAPACKE_zungtsqr_row LAPACKE_zungtsqr_row_work + LAPACKE_zlangb + LAPACKE_zlangb_work + LAPACKE_ztrsyl3 + LAPACKE_ztrsyl3_work + LAPACKE_ztz_nancheck + LAPACKE_ztz_trans + LAPACKE_zunhr_col + LAPACKE_zunhr_col_work " ## @(SRCX_OBJ) from `lapack-3.4.1/lapacke/src/Makefile` ## Not exported: requires LAPACKE_EXTENDED to be set and depends on the @@ -3616,6 +3699,7 @@ lapack_embeded_underscore_objs_s=" ssysv_aa_2stage ssytrf_aa_2stage ssytrs_aa_2stage slaorhr_col_getrfnp slaorhr_col_getrfnp2 sorhr_col + slarfb_gett " lapack_embeded_underscore_objs_c=" chetf2_rook chetrf_rook chetri_rook @@ -3641,6 +3725,7 @@ lapack_embeded_underscore_objs_c=" csysv_aa_2stage csytrf_aa_2stage csytrs_aa_2stage claunhr_col_getrfnp claunhr_col_getrfnp2 cunhr_col + clarfb_gett " lapack_embeded_underscore_objs_d=" dlasyf_rook @@ -3658,6 +3743,7 @@ lapack_embeded_underscore_objs_d=" dsysv_aa_2stage dsytrf_aa_2stage dsytrs_aa_2stage dlaorhr_col_getrfnp dlaorhr_col_getrfnp2 dorhr_col + dlarfb_gett " lapack_embeded_underscore_objs_z=" zhetf2_rook zhetrf_rook zhetri_rook @@ -3682,6 +3768,7 @@ lapack_embeded_underscore_objs_z=" zhetrs_aa_2stage zsysv_aa_2stage zsytrf_aa_2stage zsytrs_aa_2stage zlaunhr_col_getrfnp zlaunhr_col_getrfnp2 zunhr_col + zlarfb_gett " dirname=`pwd -P`/../lapack-netlib diff --git a/interface/zaxpby.c b/interface/zaxpby.c index 3a4db7403..e5065270d 100644 --- a/interface/zaxpby.c +++ b/interface/zaxpby.c @@ -39,12 +39,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef CBLAS -void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY) +void NAME(blasint *N, void *VALPHA, FLOAT *x, blasint *INCX, void *VBETA, FLOAT *y, blasint *INCY) { blasint n = *N; blasint incx = *INCX; blasint incy = *INCY; + FLOAT* ALPHA = (FLOAT*) VALPHA; + FLOAT* BETA = (FLOAT*) VBETA; #else diff --git a/test/compare_sgemm_sbgemm.c b/test/compare_sgemm_sbgemm.c index 4afa8bf93..bc74233ab 100644 --- a/test/compare_sgemm_sbgemm.c +++ b/test/compare_sgemm_sbgemm.c @@ -81,16 +81,6 @@ float16to32 (bfloat16_bits f16) return f32.v; } -float -float32to16 (float32_bits f32) -{ - bfloat16_bits f16; - f16.bits.s = f32.bits.s; - f16.bits.e = f32.bits.e; - f16.bits.m = (uint32_t) f32.bits.m >> 16; - return f32.v; -} - int main (int argc, char *argv[]) { @@ -110,6 +100,8 @@ main (int argc, char *argv[]) float C[m * n]; bfloat16_bits AA[m * k], BB[k * n]; float DD[m * n], CC[m * n]; + bfloat16 atmp,btmp; + blasint one=1; for (j = 0; j < m; j++) { @@ -118,8 +110,10 @@ main (int argc, char *argv[]) A[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5; B[j * k + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5; C[j * k + i] = 0; - AA[j * k + i].v = float32to16( A[j * k + i] ); - BB[j * k + i].v = float32to16( B[j * k + i] ); + sbstobf16_(&one, &A[j*k+i], &one, &atmp, &one); + sbstobf16_(&one, &B[j*k+i], &one, &btmp, &one); + AA[j * k + i].v = atmp; + BB[j * k + i].v = btmp; CC[j * k + i] = 0; DD[j * k + i] = 0; }