From 292a0aed66dd049825af65b6dd75a26cfb423064 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Mar 2021 06:55:14 +0100 Subject: [PATCH 01/10] Fix xcode12 build and add OSX/OpenMP --- .travis.yml | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index bde0e202d..47064672a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -224,12 +224,21 @@ matrix: before_script: - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" - brew update - - brew install gcc@10 script: - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE env: - - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10" - + - BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 FC=gfortran-10" + + - <<: *test-macos + osx_image: xcode12 + before_script: + - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" + - brew update + script: + - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE + env: + - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10" + # - <<: *test-macos # osx_image: xcode10 # env: From 70b89a6205d3c4568888c46559d88c642dd34bec Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Mar 2021 07:50:35 +0100 Subject: [PATCH 02/10] Add OSX build to Azure --- azure-pipelines.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 639cb3558..49e53cbda 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -68,4 +68,10 @@ jobs: dir openblas_utest.exe - +- job: OSX_OpenMP + pool: + vmImage: 'macOS-10.15' + steps: + - script: | + make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 + From dbb33f412f7687d047153a9c2dd6bb0a7d2c11de Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Mar 2021 08:30:48 +0100 Subject: [PATCH 03/10] Update azure-pipelines.yml --- azure-pipelines.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 49e53cbda..5040ae697 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -73,5 +73,6 @@ jobs: vmImage: 'macOS-10.15' steps: - script: | - make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 + make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 CC=clang + From e6664ec2c9cbc584faf3f4fe15cfe706767812d2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Mar 2021 08:41:48 +0100 Subject: [PATCH 04/10] Update azure-pipelines.yml --- azure-pipelines.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5040ae697..2933fa358 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -73,6 +73,9 @@ jobs: vmImage: 'macOS-10.15' steps: - script: | - make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 CC=clang + brew update + brew install gcc@10 + make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 CC=gcc + From 9dc0bfd617f94d5cb54ab52c428843999e8ea98e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Mar 2021 08:54:30 +0100 Subject: [PATCH 05/10] Update azure-pipelines.yml --- azure-pipelines.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2933fa358..cd3f7943f 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -74,8 +74,7 @@ jobs: steps: - script: | brew update - brew install gcc@10 - make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 CC=gcc + make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 CC=gcc-10 From e69b0b177101cd883768820fe639a4fb14466029 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Mar 2021 10:34:24 +0100 Subject: [PATCH 06/10] Update azure-pipelines.yml --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index cd3f7943f..fdf184b22 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -74,7 +74,7 @@ jobs: steps: - script: | brew update - make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 CC=gcc-10 + make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 CC=gcc-10 FC=gfortran-10 From 8fd694c18fe539c3dc2d5ff4965afed70ade4123 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Mar 2021 10:36:29 +0100 Subject: [PATCH 07/10] Update .travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 47064672a..2a221e3bd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -227,7 +227,7 @@ matrix: script: - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE env: - - BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 FC=gfortran-10" + - BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10" - <<: *test-macos osx_image: xcode12 From d57c681a6df3b40cc17747338a2b0f657cfc05fb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 26 Mar 2021 22:29:29 +0100 Subject: [PATCH 08/10] Fix compilation on older OSX versions --- benchmark/bench.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmark/bench.h b/benchmark/bench.h index 83de8ab2b..c03d72bef 100644 --- a/benchmark/bench.h +++ b/benchmark/bench.h @@ -3,6 +3,8 @@ #include #ifdef __CYGWIN32__ #include +#elif defined(__APPLE__) +#include #endif #include "common.h" From d2bda3b56a06a30623a840408ee8874d54d1058c Mon Sep 17 00:00:00 2001 From: CodesWithWolves Date: Wed, 31 Mar 2021 15:38:07 -0400 Subject: [PATCH 09/10] Remove Unnecessary/Erroneous Reads In sgemm_tcopy_16.S COPY1x8 Macro There appears to have been some code leak when copying from the COPY2x8 macro above where we're reading 8 bytes into d4-d7 directly after reading 4 bytes into s4-s7. These 32 bytes in d4-7 are unused and can possibly overrun the boundary of allocated memory -- Valgrind detected this which is what dragged my attention to it for a 128,1 copy. Additionally, there is no need to update the addresses stored in A0-A7 as the only possible paths after running this macro will overwrite A0-7 if looping to the next 8 rows, or overwrite A0-3 if moving to 4 rows -- in which case A4-7 are unused. --- kernel/arm64/sgemm_tcopy_16.S | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/kernel/arm64/sgemm_tcopy_16.S b/kernel/arm64/sgemm_tcopy_16.S index 12b80bdca..46198b3a2 100644 --- a/kernel/arm64/sgemm_tcopy_16.S +++ b/kernel/arm64/sgemm_tcopy_16.S @@ -270,11 +270,6 @@ All rights reserved. ldr s1, [A02] ldr s2, [A03] ldr s3, [A04] - - add A01, A01, #4 - add A02, A02, #4 - add A03, A03, #4 - add A04, A04, #4 stp s0, s1, [B04] add B04, B04, #8 @@ -285,11 +280,6 @@ All rights reserved. ldr s5, [A06] ldr s6, [A07] ldr s7, [A08] - - ldr d4, [A05], #8 - ldr d5, [A06], #8 - ldr d6, [A07], #8 - ldr d7, [A08], #8 stp s4, s5, [B04] add B04, B04, #8 From 2dbcddd83d45d32191d8e409ac3eca5672128bca Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Wed, 31 Mar 2021 21:32:42 -0500 Subject: [PATCH 10/10] POWER10: Adding check for little endian This patch makes sure that recent POWER10 patches are used only for little endian. --- kernel/power/cdot.c | 4 ++-- kernel/power/cswap.c | 4 +++- kernel/power/dasum.c | 6 ++++-- kernel/power/drot.c | 6 ++++-- kernel/power/dscal.c | 8 +++++--- kernel/power/dswap.c | 6 ++++-- kernel/power/sasum.c | 6 ++++-- kernel/power/srot.c | 6 ++++-- kernel/power/sscal.c | 8 +++++--- kernel/power/sswap.c | 6 ++++-- kernel/power/zscal.c | 6 +++++- kernel/power/zswap.c | 4 +++- 12 files changed, 47 insertions(+), 23 deletions(-) diff --git a/kernel/power/cdot.c b/kernel/power/cdot.c index c53fe0c02..b9e2d2ce5 100644 --- a/kernel/power/cdot.c +++ b/kernel/power/cdot.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #include "common.h" -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "cdot_microk_power10.c" #else #ifndef HAVE_KERNEL_8 @@ -120,7 +120,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA if ((inc_x == 1) && (inc_y == 1)) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) BLASLONG n1 = n & -16; #else BLASLONG n1 = n & -8; diff --git a/kernel/power/cswap.c b/kernel/power/cswap.c index 4d9b9ccd6..c2fde1c44 100644 --- a/kernel/power/cswap.c +++ b/kernel/power/cswap.c @@ -39,8 +39,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "cswap_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "cswap_microk_power10.c" +#elif defined(POWER10) +#include "cswap_microk_power8.c" #endif #endif diff --git a/kernel/power/dasum.c b/kernel/power/dasum.c index 0cdec3292..7507621cf 100644 --- a/kernel/power/dasum.c +++ b/kernel/power/dasum.c @@ -49,8 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "dasum_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "dasum_microk_power10.c" +#elif defined(POWER10) +#include "dasum_microk_power8.c" #endif #endif @@ -112,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) if ( inc_x == 1 ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; diff --git a/kernel/power/drot.c b/kernel/power/drot.c index 94d9d95a3..3229878e4 100644 --- a/kernel/power/drot.c +++ b/kernel/power/drot.c @@ -42,8 +42,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "drot_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "drot_microk_power10.c" +#elif defined(POWER10) +#include "drot_microk_power8.c" #endif #endif @@ -117,7 +119,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT if ( (inc_x == 1) && (inc_y == 1) ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; diff --git a/kernel/power/dscal.c b/kernel/power/dscal.c index 96c4e51bc..32c39a8f4 100644 --- a/kernel/power/dscal.c +++ b/kernel/power/dscal.c @@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "dscal_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "dscal_microk_power10.c" +#elif defined(POWER10) +#include "dscal_microk_power8.c" #endif #endif @@ -102,7 +104,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( da == 0.0 ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; @@ -136,7 +138,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS else { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; diff --git a/kernel/power/dswap.c b/kernel/power/dswap.c index 9e6229c6a..12476965b 100644 --- a/kernel/power/dswap.c +++ b/kernel/power/dswap.c @@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "dswap_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "swap_microk_power10.c" +#elif defined(POWER10) +#include "dswap_microk_power8.c" #endif #endif @@ -117,7 +119,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, if ( (inc_x == 1) && (inc_y == 1 )) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 32 ) { BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; diff --git a/kernel/power/sasum.c b/kernel/power/sasum.c index af692a7fa..991d27508 100644 --- a/kernel/power/sasum.c +++ b/kernel/power/sasum.c @@ -49,8 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "sasum_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "sasum_microk_power10.c" +#elif defined(POWER10) +#include "sasum_microk_power8.c" #endif #endif @@ -112,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) if ( inc_x == 1 ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 32 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; diff --git a/kernel/power/srot.c b/kernel/power/srot.c index 3e4f93e2a..5a0d4b12e 100644 --- a/kernel/power/srot.c +++ b/kernel/power/srot.c @@ -42,8 +42,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "srot_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "srot_microk_power10.c" +#elif defined(POWER10) +#include "srot_microk_power8.c" #endif #endif @@ -117,7 +119,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT if ( (inc_x == 1) && (inc_y == 1) ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; diff --git a/kernel/power/sscal.c b/kernel/power/sscal.c index 65572a8c1..9ae9ccab8 100644 --- a/kernel/power/sscal.c +++ b/kernel/power/sscal.c @@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "sscal_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "sscal_microk_power10.c" +#elif defined(POWER10) +#include "sscal_microk_power8.c" #endif #endif @@ -104,7 +106,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( da == 0.0 ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 32 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; @@ -138,7 +140,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS else { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 32 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; diff --git a/kernel/power/sswap.c b/kernel/power/sswap.c index dd249fd36..955ed02f0 100644 --- a/kernel/power/sswap.c +++ b/kernel/power/sswap.c @@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "sswap_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "swap_microk_power10.c" +#elif defined(POWER10) +#include "sswap_microk_power8.c" #endif #endif @@ -117,7 +119,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, if ( (inc_x == 1) && (inc_y == 1 )) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 64 ) { BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; diff --git a/kernel/power/zscal.c b/kernel/power/zscal.c index 0068138e8..59ddc149f 100644 --- a/kernel/power/zscal.c +++ b/kernel/power/zscal.c @@ -43,12 +43,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(DOUBLE) #include "zscal_microk_power8.c" #endif -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #if defined(DOUBLE) #include "zscal_microk_power10.c" #else #include "cscal_microk_power10.c" #endif +#elif defined(POWER10) +#if defined(DOUBLE) +#include "zscal_microk_power8.c" +#endif #endif #endif diff --git a/kernel/power/zswap.c b/kernel/power/zswap.c index 6cd3d9664..908802b71 100644 --- a/kernel/power/zswap.c +++ b/kernel/power/zswap.c @@ -39,8 +39,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "zswap_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "cswap_microk_power10.c" +#elif defined(POWER10) +#include "zswap_microk_power8.c" #endif #endif