From ff65952e46b84a4a1a969d1cff7e90c3fb15ae43 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Oct 2020 00:55:41 +0200 Subject: [PATCH 1/5] Move HAVE_P10_SUPPORT to the build system to be able to include a binutils version check --- driver/others/dynamic_power.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/others/dynamic_power.c b/driver/others/dynamic_power.c index ca1d42408..85fc5b3ba 100644 --- a/driver/others/dynamic_power.c +++ b/driver/others/dynamic_power.c @@ -6,10 +6,10 @@ extern gotoblas_t gotoblas_POWER8; #if (!defined __GNUC__) || ( __GNUC__ >= 6) extern gotoblas_t gotoblas_POWER9; #endif -#if (!defined __GNUC__) || ( __GNUC__ >= 11) \ - || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) -#define HAVE_P10_SUPPORT 1 -#endif +//#if (!defined __GNUC__) || ( __GNUC__ >= 11) \ +// || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) +//#define HAVE_P10_SUPPORT 1 +//#endif #ifdef HAVE_P10_SUPPORT extern gotoblas_t gotoblas_POWER10; #endif From bb8c3f68611fadff9a99b7cfdebaf250ccbaa129 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Oct 2020 01:04:20 +0200 Subject: [PATCH 2/5] Add ld/binutils version check for POWER10 support --- Makefile.system | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Makefile.system b/Makefile.system index 461f7370b..7f0c26796 100644 --- a/Makefile.system +++ b/Makefile.system @@ -641,6 +641,7 @@ DYNAMIC_CORE += POWER8 ifneq ($(C_COMPILER), GCC) DYNAMIC_CORE += POWER9 DYNAMIC_CORE += POWER10 +CCOMMON_OPT += -DHAVE_P10_SUPPORT endif ifeq ($(C_COMPILER), GCC) ifeq ($(GCCVERSIONGT5), 1) @@ -648,11 +649,14 @@ DYNAMIC_CORE += POWER9 else $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) endif -ifeq ($(GCCVERSIONGTEQ11), 1) +LDVERSIONGTEQ35 := $(shell expr ld --version | head -1 | cut -f2 -d "." | cut -f1 -d "-" >= 35) +ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11) DYNAMIC_CORE += POWER10 +CCOMMON_OPT += -DHAVE_P10_SUPPORT else ifeq ($(GCCVERSIONGTEQ10), 1) -ifeq ($(GCCMINORVERSIONGTEQ2), 1) +ifeq ($(GCCMINORVERSIONGTEQ2)$(LDVERSIONGTEQ35), 11) DYNAMIC_CORE += POWER10 +CCOMMON_OPT += -DHAVE_P10_SUPPORT endif else $(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.) From eddc65c7b751d280d5cc16f4121eeb923920c8c1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Oct 2020 01:09:49 +0200 Subject: [PATCH 3/5] Add POWER10 support flag (unconditionally for now) --- cmake/arch.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/arch.cmake b/cmake/arch.cmake index 99e685d04..5457bfb07 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -49,6 +49,7 @@ if (DYNAMIC_ARCH) if (POWER) set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10) + set(CCOMMON_OPT "${CCOMMON_OPT} -DHAVE_P10_SUPPORT") endif () if (X86) From b073d759d056cef826e3906ef00a42068df46091 Mon Sep 17 00:00:00 2001 From: Bart Oldeman Date: Tue, 20 Oct 2020 02:16:47 +0000 Subject: [PATCH 4/5] x86_64: clobber all xmm registers after vzeroupper As observed using GCC 10 using -march=native -ftree-vectorize on Knights Landing, it is now smart enough to find clobbers inside non-inlined static functions. In particular, sgemv counted on a kernel to preserve the whole %ymm2 register (since it was not in the clobber list), but the top part was destroyed by vzeroupper. This caused many tests to fail. This patch makes sure all xmm (and ymm/zmm by extension) registers are listed as clobbered to avoid this happening, as most kernels already did correctly in fact. --- kernel/x86_64/caxpy_microk_bulldozer-2.c | 5 +++-- kernel/x86_64/caxpy_microk_haswell-2.c | 2 +- kernel/x86_64/caxpy_microk_sandy-2.c | 2 +- kernel/x86_64/caxpy_microk_steamroller-2.c | 5 +++-- kernel/x86_64/daxpy_microk_haswell-2.c | 5 +++-- kernel/x86_64/ddot_microk_haswell-2.c | 5 +++-- kernel/x86_64/ddot_microk_piledriver-2.c | 2 ++ kernel/x86_64/ddot_microk_sandy-2.c | 5 +++-- kernel/x86_64/ddot_microk_steamroller-2.c | 1 + kernel/x86_64/dgemv_n_microk_haswell-4.c | 14 ++++++-------- kernel/x86_64/dgemv_n_microk_piledriver-4.c | 6 ++++-- kernel/x86_64/dgemv_t_microk_haswell-4.c | 2 ++ kernel/x86_64/saxpy_microk_haswell-2.c | 3 ++- kernel/x86_64/saxpy_microk_piledriver-2.c | 6 ++++-- kernel/x86_64/sdot_microk_haswell-2.c | 5 +++-- kernel/x86_64/sdot_microk_sandy-2.c | 5 +++-- kernel/x86_64/sgemv_n_microk_haswell-4.c | 14 ++++++-------- kernel/x86_64/sgemv_t_microk_haswell-4.c | 2 ++ kernel/x86_64/zaxpy_microk_bulldozer-2.c | 5 +++-- kernel/x86_64/zaxpy_microk_haswell-2.c | 2 +- kernel/x86_64/zaxpy_microk_sandy-2.c | 6 ++++-- kernel/x86_64/zaxpy_microk_steamroller-2.c | 5 +++-- 22 files changed, 63 insertions(+), 44 deletions(-) diff --git a/kernel/x86_64/caxpy_microk_bulldozer-2.c b/kernel/x86_64/caxpy_microk_bulldozer-2.c index ca2209340..a32558dc9 100644 --- a/kernel/x86_64/caxpy_microk_bulldozer-2.c +++ b/kernel/x86_64/caxpy_microk_bulldozer-2.c @@ -122,7 +122,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", @@ -189,9 +189,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/caxpy_microk_haswell-2.c b/kernel/x86_64/caxpy_microk_haswell-2.c index b605ea34c..129ce7a49 100644 --- a/kernel/x86_64/caxpy_microk_haswell-2.c +++ b/kernel/x86_64/caxpy_microk_haswell-2.c @@ -120,7 +120,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", diff --git a/kernel/x86_64/caxpy_microk_sandy-2.c b/kernel/x86_64/caxpy_microk_sandy-2.c index 72d37afed..564dfbd0f 100644 --- a/kernel/x86_64/caxpy_microk_sandy-2.c +++ b/kernel/x86_64/caxpy_microk_sandy-2.c @@ -104,7 +104,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", diff --git a/kernel/x86_64/caxpy_microk_steamroller-2.c b/kernel/x86_64/caxpy_microk_steamroller-2.c index 7ca7af070..cc5c5de76 100644 --- a/kernel/x86_64/caxpy_microk_steamroller-2.c +++ b/kernel/x86_64/caxpy_microk_steamroller-2.c @@ -122,7 +122,7 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", @@ -189,9 +189,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/daxpy_microk_haswell-2.c b/kernel/x86_64/daxpy_microk_haswell-2.c index f3682e6d7..ecc0ecbd3 100644 --- a/kernel/x86_64/daxpy_microk_haswell-2.c +++ b/kernel/x86_64/daxpy_microk_haswell-2.c @@ -67,8 +67,9 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (y), // 3 "r" (alpha) // 4 : "cc", - "%xmm0", - "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/ddot_microk_haswell-2.c b/kernel/x86_64/ddot_microk_haswell-2.c index dbb5487f7..faac72870 100644 --- a/kernel/x86_64/ddot_microk_haswell-2.c +++ b/kernel/x86_64/ddot_microk_haswell-2.c @@ -84,8 +84,9 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "r" (y), // 3 "r" (dot) // 4 : "cc", - "%xmm4", "%xmm5", - "%xmm6", "%xmm7", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/ddot_microk_piledriver-2.c b/kernel/x86_64/ddot_microk_piledriver-2.c index cc4bcd90a..0320a2e36 100644 --- a/kernel/x86_64/ddot_microk_piledriver-2.c +++ b/kernel/x86_64/ddot_microk_piledriver-2.c @@ -91,6 +91,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); @@ -155,6 +156,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/ddot_microk_sandy-2.c b/kernel/x86_64/ddot_microk_sandy-2.c index 84493ec27..35ba86a7d 100644 --- a/kernel/x86_64/ddot_microk_sandy-2.c +++ b/kernel/x86_64/ddot_microk_sandy-2.c @@ -89,8 +89,9 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "r" (y), // 3 "r" (dot) // 4 : "cc", - "%xmm4", "%xmm5", - "%xmm6", "%xmm7", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/ddot_microk_steamroller-2.c b/kernel/x86_64/ddot_microk_steamroller-2.c index 27d5244ce..94c012f0d 100644 --- a/kernel/x86_64/ddot_microk_steamroller-2.c +++ b/kernel/x86_64/ddot_microk_steamroller-2.c @@ -88,6 +88,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/dgemv_n_microk_haswell-4.c b/kernel/x86_64/dgemv_n_microk_haswell-4.c index da0fa2fff..c20c0a030 100644 --- a/kernel/x86_64/dgemv_n_microk_haswell-4.c +++ b/kernel/x86_64/dgemv_n_microk_haswell-4.c @@ -105,9 +105,8 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "r" (alpha) // 8 : "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", - "%xmm4", "%xmm5", - "%xmm6", "%xmm7", - "%xmm8", "%xmm9", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); @@ -182,11 +181,10 @@ static void dgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "r" (ap[1]), // 5 "r" (alpha) // 6 : "cc", - "%xmm0", "%xmm1", - "%xmm4", "%xmm5", - "%xmm6", - "%xmm8", - "%xmm12", "%xmm13", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); } diff --git a/kernel/x86_64/dgemv_n_microk_piledriver-4.c b/kernel/x86_64/dgemv_n_microk_piledriver-4.c index 466931b82..57fa426ba 100644 --- a/kernel/x86_64/dgemv_n_microk_piledriver-4.c +++ b/kernel/x86_64/dgemv_n_microk_piledriver-4.c @@ -140,7 +140,7 @@ static void dgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", - "%xmm8", "%xmm9", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); @@ -235,9 +235,11 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "r" (ap[3]), // 7 "r" (alpha) // 8 : "cc", + "%xmm0", "%xmm1", + "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", - "%xmm8", "%xmm9", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/dgemv_t_microk_haswell-4.c b/kernel/x86_64/dgemv_t_microk_haswell-4.c index 958fd3e0a..b398307d3 100644 --- a/kernel/x86_64/dgemv_t_microk_haswell-4.c +++ b/kernel/x86_64/dgemv_t_microk_haswell-4.c @@ -117,7 +117,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "r" (ap[2]), // 6 "r" (ap[3]) // 7 : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/saxpy_microk_haswell-2.c b/kernel/x86_64/saxpy_microk_haswell-2.c index 7099ba4c6..8cc697f05 100644 --- a/kernel/x86_64/saxpy_microk_haswell-2.c +++ b/kernel/x86_64/saxpy_microk_haswell-2.c @@ -67,7 +67,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (y), // 3 "r" (alpha) // 4 : "cc", - "%xmm0", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" diff --git a/kernel/x86_64/saxpy_microk_piledriver-2.c b/kernel/x86_64/saxpy_microk_piledriver-2.c index 5feea7f24..ebbcc0045 100644 --- a/kernel/x86_64/saxpy_microk_piledriver-2.c +++ b/kernel/x86_64/saxpy_microk_piledriver-2.c @@ -86,7 +86,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (y), // 3 "r" (alpha) // 4 : "cc", - "%xmm0", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" @@ -147,7 +148,8 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (y), // 3 "r" (alpha) // 4 : "cc", - "%xmm0", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" diff --git a/kernel/x86_64/sdot_microk_haswell-2.c b/kernel/x86_64/sdot_microk_haswell-2.c index 91dc928d3..322f4b28c 100644 --- a/kernel/x86_64/sdot_microk_haswell-2.c +++ b/kernel/x86_64/sdot_microk_haswell-2.c @@ -87,8 +87,9 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "r" (y), // 3 "r" (dot) // 4 : "cc", - "%xmm4", "%xmm5", - "%xmm6", "%xmm7", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/sdot_microk_sandy-2.c b/kernel/x86_64/sdot_microk_sandy-2.c index ae25d5a50..ce09b06cf 100644 --- a/kernel/x86_64/sdot_microk_sandy-2.c +++ b/kernel/x86_64/sdot_microk_sandy-2.c @@ -90,8 +90,9 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot) "r" (y), // 3 "r" (dot) // 4 : "cc", - "%xmm4", "%xmm5", - "%xmm6", "%xmm7", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/sgemv_n_microk_haswell-4.c b/kernel/x86_64/sgemv_n_microk_haswell-4.c index 93e1e26e8..556dcfde5 100644 --- a/kernel/x86_64/sgemv_n_microk_haswell-4.c +++ b/kernel/x86_64/sgemv_n_microk_haswell-4.c @@ -164,11 +164,9 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "r" (ap[3]), // 8 "r" (alpha) // 9 : "cc", - "%xmm0", "%xmm1", - "%xmm2", "%xmm3", - "%xmm4", "%xmm5", - "%xmm6", "%xmm7", - "%xmm8", "%xmm9", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); @@ -286,9 +284,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "r" (ap[3]), // 7 "r" (alpha) // 8 : "cc", - "%xmm4", "%xmm5", - "%xmm6", "%xmm7", - "%xmm8", "%xmm9", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", + "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/sgemv_t_microk_haswell-4.c b/kernel/x86_64/sgemv_t_microk_haswell-4.c index 8c370b4c0..fcabc0def 100644 --- a/kernel/x86_64/sgemv_t_microk_haswell-4.c +++ b/kernel/x86_64/sgemv_t_microk_haswell-4.c @@ -138,7 +138,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) "r" (ap[2]), // 6 "r" (ap[3]) // 7 : "cc", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", + "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/zaxpy_microk_bulldozer-2.c b/kernel/x86_64/zaxpy_microk_bulldozer-2.c index 15d367971..ccb26134f 100644 --- a/kernel/x86_64/zaxpy_microk_bulldozer-2.c +++ b/kernel/x86_64/zaxpy_microk_bulldozer-2.c @@ -122,7 +122,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", @@ -189,9 +189,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/zaxpy_microk_haswell-2.c b/kernel/x86_64/zaxpy_microk_haswell-2.c index 89d23daf3..8f299ea2d 100644 --- a/kernel/x86_64/zaxpy_microk_haswell-2.c +++ b/kernel/x86_64/zaxpy_microk_haswell-2.c @@ -120,7 +120,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", diff --git a/kernel/x86_64/zaxpy_microk_sandy-2.c b/kernel/x86_64/zaxpy_microk_sandy-2.c index 17b8b24f7..5246c72e8 100644 --- a/kernel/x86_64/zaxpy_microk_sandy-2.c +++ b/kernel/x86_64/zaxpy_microk_sandy-2.c @@ -108,9 +108,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); return; @@ -185,9 +186,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); diff --git a/kernel/x86_64/zaxpy_microk_steamroller-2.c b/kernel/x86_64/zaxpy_microk_steamroller-2.c index 907b1ae00..88e3a680b 100644 --- a/kernel/x86_64/zaxpy_microk_steamroller-2.c +++ b/kernel/x86_64/zaxpy_microk_steamroller-2.c @@ -122,7 +122,7 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15", @@ -189,9 +189,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) "r" (alpha), // 4 "r" (mvec) // 5 : "cc", - "%xmm0", "%xmm1", + "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", + "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); From 1a0f57c8f0bd4e2a1eb8ae7a996a09468d7f3067 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 20 Oct 2020 08:37:53 +0200 Subject: [PATCH 5/5] Fix missing backquotes --- Makefile.system | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.system b/Makefile.system index 7f0c26796..30d8f4ccf 100644 --- a/Makefile.system +++ b/Makefile.system @@ -649,7 +649,7 @@ DYNAMIC_CORE += POWER9 else $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) endif -LDVERSIONGTEQ35 := $(shell expr ld --version | head -1 | cut -f2 -d "." | cut -f1 -d "-" >= 35) +LDVERSIONGTEQ35 := $(shell expr `ld --version | head -1 | cut -f2 -d "." | cut -f1 -d "-"` >= 35) ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11) DYNAMIC_CORE += POWER10 CCOMMON_OPT += -DHAVE_P10_SUPPORT