From 7d6c85f9da82f10615daefc9135a2616a4347855 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 18 Oct 2020 19:27:51 +0200 Subject: [PATCH 01/10] Add compiler option -mmma for POWER10 --- Makefile.power | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.power b/Makefile.power index e766f8499..59af8ef55 100644 --- a/Makefile.power +++ b/Makefile.power @@ -10,7 +10,7 @@ USE_OPENMP = 1 endif ifeq ($(CORE), POWER10) -COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math +COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -mmma -fno-fast-math FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math endif From d85b24e10320c292c9e3b0f8eff24c032411eeb7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 18 Oct 2020 19:29:45 +0200 Subject: [PATCH 02/10] Clean up STACKSIZE redefinition --- kernel/power/dtrmm_kernel_16x4_power8.S | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/power/dtrmm_kernel_16x4_power8.S b/kernel/power/dtrmm_kernel_16x4_power8.S index 84c65f503..91154ad37 100644 --- a/kernel/power/dtrmm_kernel_16x4_power8.S +++ b/kernel/power/dtrmm_kernel_16x4_power8.S @@ -82,7 +82,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 320 #define STACKSIZE 520 #define ALPHA_SP 296+200(SP) #define FZERO 304+200(SP) From c1422f3e4624f1733bcc0896a491bf32bc2c1b97 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 18 Oct 2020 19:31:01 +0200 Subject: [PATCH 03/10] Clean up STACKSIZE redefinition --- kernel/power/dtrsm_kernel_LT_16x4_power8.S | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/power/dtrsm_kernel_LT_16x4_power8.S b/kernel/power/dtrsm_kernel_LT_16x4_power8.S index 8a423f181..5b349db12 100644 --- a/kernel/power/dtrsm_kernel_LT_16x4_power8.S +++ b/kernel/power/dtrsm_kernel_LT_16x4_power8.S @@ -47,7 +47,6 @@ #endif #ifdef __64BIT__ -#define STACKSIZE 320 #define STACKSIZE 520 #define ALPHA 296+200(SP) #define FZERO 304+200(SP) From 17e288e18d0f308d0edccf6e53ac34a4029d4e46 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 18 Oct 2020 19:37:04 +0200 Subject: [PATCH 04/10] Clean up STACKSIZE redefinition --- kernel/power/ctrmm_kernel_8x4_power8.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/power/ctrmm_kernel_8x4_power8.S b/kernel/power/ctrmm_kernel_8x4_power8.S index 822420dfd..35faad19e 100644 --- a/kernel/power/ctrmm_kernel_8x4_power8.S +++ b/kernel/power/ctrmm_kernel_8x4_power8.S @@ -82,12 +82,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 400 #define STACKSIZE 592 #define ALPHA_R_SP 304+192(SP) #define ALPHA_I_SP 312+192(SP) #else -#define STACKSIZE 256 #define STACKSIZE 452 #define ALPHA_R_SP 224+196(SP) #define ALPHA_I_SP 232+196(SP) From 97cf10062f328afa1d1a3a4700839a46d7fe6214 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 18 Oct 2020 19:39:18 +0200 Subject: [PATCH 05/10] Clean up STACKSIZE redefinition --- kernel/power/strmm_kernel_16x8_power8.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/power/strmm_kernel_16x8_power8.S b/kernel/power/strmm_kernel_16x8_power8.S index 78e539231..a8182b5aa 100644 --- a/kernel/power/strmm_kernel_16x8_power8.S +++ b/kernel/power/strmm_kernel_16x8_power8.S @@ -12,7 +12,7 @@ the documentation and/or other materials provided with the distribution. 3. Neither the name of the OpenBLAS project nor the names of its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. +derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -82,7 +82,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 340 #define STACKSIZE 540 #define ALPHA_SP 296+200(SP) #define FZERO 304+200(SP) From f1a4071d8cd6aa94ff0e86a77e6b8f29823b2751 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 18 Oct 2020 19:41:43 +0200 Subject: [PATCH 06/10] Clean up STACKSIZE redefinition --- kernel/power/dgemm_kernel_16x4_power8.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/power/dgemm_kernel_16x4_power8.S b/kernel/power/dgemm_kernel_16x4_power8.S index 651fd53fc..f8ed12ee9 100644 --- a/kernel/power/dgemm_kernel_16x4_power8.S +++ b/kernel/power/dgemm_kernel_16x4_power8.S @@ -82,12 +82,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 320 #define STACKSIZE 512 #define ALPHA_SP 296+192(SP) #define FZERO 304+192(SP) #else -#define STACKSIZE 240 #define STACKSIZE 440 #define ALPHA_SP 224+200(SP) #define FZERO 232+200(SP) From 7eddaf0d6fb861c11c425fc47b87870585a95829 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 19 Oct 2020 08:11:22 +0200 Subject: [PATCH 07/10] Remove -mmma again (reduntant with cpu=power10) and add override statements --- Makefile.power | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.power b/Makefile.power index 59af8ef55..6de59c53d 100644 --- a/Makefile.power +++ b/Makefile.power @@ -10,8 +10,8 @@ USE_OPENMP = 1 endif ifeq ($(CORE), POWER10) -COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -mmma -fno-fast-math -FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math +override COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math +override FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math endif ifeq ($(CORE), POWER9) From a61c086408650f51e09dbbfcc1b72ecb33272000 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 19 Oct 2020 09:12:12 +0200 Subject: [PATCH 08/10] Fix spurious trailing whitespace in comment --- kernel/power/strmm_kernel_16x8_power8.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/power/strmm_kernel_16x8_power8.S b/kernel/power/strmm_kernel_16x8_power8.S index a8182b5aa..1f9912c49 100644 --- a/kernel/power/strmm_kernel_16x8_power8.S +++ b/kernel/power/strmm_kernel_16x8_power8.S @@ -12,7 +12,7 @@ the documentation and/or other materials provided with the distribution. 3. Neither the name of the OpenBLAS project nor the names of its contributors may be used to endorse or promote products -derived from this software without specific prior written permission. +derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE From fe2a922adaac599077651119c2230987a44a7fb6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 19 Oct 2020 17:43:53 +0200 Subject: [PATCH 09/10] Add POWER10 compiler options to CCOMMON_OPT rather than COMMON_OPT --- Makefile.power | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.power b/Makefile.power index 6de59c53d..c7e972290 100644 --- a/Makefile.power +++ b/Makefile.power @@ -10,8 +10,8 @@ USE_OPENMP = 1 endif ifeq ($(CORE), POWER10) -override COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math -override FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math +CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math +FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math endif ifeq ($(CORE), POWER9) From 4ad33c46b0c4b13606653d9a461f06a22f4fd404 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 19 Oct 2020 20:37:52 +0200 Subject: [PATCH 10/10] Add back symbols that got dropped when splitting by type --- exports/gensymbol | 103 +++++++++++++++++++++++++++++++--------------- 1 file changed, 70 insertions(+), 33 deletions(-) diff --git a/exports/gensymbol b/exports/gensymbol index e1f728790..d5ec45fad 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -51,7 +51,7 @@ zgeadd, dzsum); @blasobjs = (lsame, xerbla); -@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); +@bfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); @cblasobjsc = ( cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, @@ -72,7 +72,7 @@ ); @cblasobjss = ( - cblas_sasum, cblas_saxpy, + cblas_sasum, cblas_saxpy, cblas_saxpby, cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm, cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg, cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr, @@ -92,9 +92,9 @@ cblas_izamax, cblas_izamin, cblas_izmin, cblas_izmax, cblas_dzsum,cblas_zimatcopy,cblas_zomatcopy ); -@cblasobjs = ( cblas_xerbla ); +@cblasobjs = ( cblas_xerbla ); -@halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod); +@bfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod); @exblasobjs = ( qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, @@ -415,7 +415,7 @@ zpotri, cgeqrt, cgeqrt2, cgeqrt3, cgemqrt, ctpqrt, ctpqrt2, ctpmqrt, ctprfb, ); -@lapack2objszc = ( +@lapackobjs2zc = ( # ZCLASRC -- Double-single mixed precision complex routines called from # single, single-extra and double precision complex LAPACK # routines (i.e. from CLASRC, CXLASRC, ZLASRC). @@ -425,7 +425,7 @@ zpotri, cpotrs, ); -@lapack2objsd = ( +@lapackobjs2d = ( # DLASRC -- Double precision real LAPACK routines # already provided by @lapackobjs: # dgesv, dgetf2, dgetrs, dlaswp, dlauu2, dlauum, dpotf2, dpotrf, dpotri, @@ -568,7 +568,7 @@ zpotri, ); # functions added for lapack-3.6.0 -@lapack2objsc = ( @lapack2objsc, +@lapackobjs2c = ( @lapackobjs2c, cgejsv, cgesvdx, cgesvj, @@ -604,7 +604,7 @@ zpotri, csyr2, cunm22, ); -@lapackobjs2d = (@lapack2objsd, +@lapackobjs2d = (@lapackobjs2d, dbdsvdx, dgesvdx, dgetrf2, @@ -637,7 +637,7 @@ zpotri, dpotrf2, dsecnd, ); - @lapack2objss = (@lapack2objss, + @lapackobjs2s = (@lapackobjs2s, sbdsvdx, second, sgesvdx, @@ -670,7 +670,7 @@ zpotri, sorm22, spotrf2, ); - @lapack2objsz = (@lapack2objsz, + @lapackobjs2z = (@lapackobjs2z, zgejsv, zgesvdx, zgesvj, @@ -707,7 +707,7 @@ zpotri, zunm22, ); # functions added for lapack-3.7.0 -@lapack2objss = (@lapack2objss, +@lapackobjs2s = (@lapackobjs2s, slarfy, strevc3, sgelqt, @@ -726,7 +726,7 @@ zpotri, stplqt2, stpmlqt, ); - @lapack2objsd = (@lapack2objsd, + @lapackobjs2d = (@lapackobjs2d, dlarfy, dsyconvf, dtrevc3, @@ -746,7 +746,7 @@ zpotri, dtplqt2, dtpmlqt, ); - @lapack2objsc = (@lapack2objsc, + @lapackobjs2c = (@lapackobjs2c, clarfy, csyconvf, ctrevc3, @@ -766,7 +766,7 @@ zpotri, ctplqt2, ctpmlqt, ); - @lapack2objsz = (@lapack2objsz, + @lapackobjs2z = (@lapackobjs2z, zlarfy, zsyconvf, ztrevc3, @@ -786,7 +786,7 @@ zpotri, zlamswlq, zgemlq, ); - @lapack2objs = (@lapack2objs, + @lapackobjs2 = (@lapackobjs2, sladiv1, dladiv1, iparam2stage, @@ -796,21 +796,21 @@ zpotri, ilaenv2stage, ); # functions added for lapack-3.9.0 -@lapack2objsc = (@lapack2objsc, +@lapackobjs2c = (@lapackobjs2c, cgesvdq, cungtsqr, dcombssq, ); -@lapack2objsd = (@lapack2objsd, +@lapackobjs2d = (@lapackobjs2d, dgesvdq, dorgtsqr, ); -@lapack2objss = (@lapack2objss, +@lapackobjs2s = (@lapackobjs2s, scombssq, sgesvdq, sorgtsqr, ); -@lapack2objsz = (@lapack2objsz, +@lapackobjs2z = (@lapackobjs2z, zgesvdq, zungtsqr ); @@ -835,10 +835,29 @@ zpotri, dlatzm, dtzrqf); @lapack_deprecated_objss = ( + sgelsx, sgegs, - sgegv, + sgegv, + sgeqpf, + sggsvd, + sggsvp, + slahrd, + slatzm, + stzrqf ); - + +@lapack_deprecated_objsz = ( + zgegs, + zgegv, + zgelsx, + zgeqpf, + zggsvd, + zggsvp, + zlahrd, + zlatzm, + ztzrqf + ); + @lapacke_deprecated_objsc = ( LAPACKE_cggsvp, LAPACKE_cggsvp_work, @@ -3590,48 +3609,66 @@ use File::Basename; my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib"); if ($ARGV[12] == 1) { - @blasobjs = (@blasobjs, @halfblasobjs); - @cblasobjs = (@cblasobjs, @halfcblasobjs); + @blasobjs = (@blasobjs, @bfblasobjs); + @cblasobjs = (@cblasobjs, @bfcblasobjs); } if ($ARGV[13] == 1) { @blasobjs = (@blasobjs, @blasobjss); @cblasobjs = (@cblasobjs, @cblasobjss); @lapackobjs = (@lapackobjs, @lapackobjss); - @lapack2objs = (@lapack2objs, @lapack2objss); + @lapackobjs2 = (@lapackobjs2, @lapackobjs2s); + @lapackobjs2 = (@lapackobjs2, @lapackobjs2sc); + @lapackobjs2 = (@lapackobjs2, @lapackobjs2ds); + @lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objss); + @lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objss); @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s); @lapackeobjs = (@lapackeobjs, @lapackeobjss); - @lapackobjs2 = (@lapackobjs2, @lapackobjs2s); } if ($ARGV[14] == 1) { @blasobjs = (@blasobjs, @blasobjsd); @cblasobjs = (@cblasobjs, @cblasobjsd); @lapackobjs = (@lapackobjs, @lapackobjsd); - @lapack2objs = (@lapack2objs, @lapack2objsd); + if ($ARGV[13] == 0) { + @lapackobjs2 = (@lapackobjs2, @lapackobjs2ds); + } + @lapackobjs2 = (@lapackobjs2, @lapackobjs2d, @lapackobjs2dz); + @lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsd); + @lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsd); @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d); @lapackeobjs = (@lapackeobjs, @lapackeobjsd); - @lapackobjs2 = (@lapackobjs2, @lapackobjs2d); } if ($ARGV[15] == 1) { @blasobjs = (@blasobjs, @blasobjsc); @cblasobjs = (@cblasobjs, @cblasobjsc); @gemm3mobjs = (@gemm3mobjs, @gemm3mobjsc); - @cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsc); + @cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsc); @lapackobjs = (@lapackobjs, @lapackobjsc); - @lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc); + @lapackobjs2 = (@lapackobjs2, @lapackobjs2c, @lapackobjs2zc); + if ($ARGV[13] == 0) { + @lapackobjs2 = (@lapackobjs2, @lapackobjs2sc); + } + @lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsc); + @lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsc); @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c); @lapackeobjs = (@lapackeobjs, @lapackeobjsc); - @lapackobjs2 = (@lapackobjs2, @lapackobjs2sc, @lapackobjs2c); } if ($ARGV[16] == 1) { @blasobjs = (@blasobjs, @blasobjsz); @cblasobjs = (@cblasobjs, @cblasobjsz); @gemm3mobjs = (@gemm3mobjs, @gemm3mobjsz); - @cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsz); + @cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsz); @lapackobjs = (@lapackobjs, @lapackobjsz); - @lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc); + @lapackobjs2 = (@lapackobjs2, @lapackobjs2z); + if ($ARGV[15] == 0) { + @lapackobjs2 = (@lapackobjs2, @lapackobjs2zc); + } + if ($ARGV[14] == 0) { + @lapackobjs2 = (@lapackobjs2, @lapackobjs2dz); + } + @lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsz); + @lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsz); @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z); @lapackeobjs = (@lapackeobjs, @lapackeobjsz); - @lapackobjs2 = (@lapackobjs2, @lapackobjs2dz, @lapackobjs2z); } if ($ARGV[8] == 1) { #ONLY_CBLAS=1