diff --git a/.travis.yml b/.travis.yml index bde0e202d..2a221e3bd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -224,12 +224,21 @@ matrix: before_script: - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" - brew update - - brew install gcc@10 script: - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE env: - - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10" - + - BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10" + + - <<: *test-macos + osx_image: xcode12 + before_script: + - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" + - brew update + script: + - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE + env: + - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10" + # - <<: *test-macos # osx_image: xcode10 # env: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 639cb3558..fdf184b22 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -68,4 +68,13 @@ jobs: dir openblas_utest.exe +- job: OSX_OpenMP + pool: + vmImage: 'macOS-10.15' + steps: + - script: | + brew update + make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 CC=gcc-10 FC=gfortran-10 + + diff --git a/benchmark/bench.h b/benchmark/bench.h index 83de8ab2b..c03d72bef 100644 --- a/benchmark/bench.h +++ b/benchmark/bench.h @@ -3,6 +3,8 @@ #include #ifdef __CYGWIN32__ #include +#elif defined(__APPLE__) +#include #endif #include "common.h" diff --git a/kernel/arm64/sgemm_tcopy_16.S b/kernel/arm64/sgemm_tcopy_16.S index 12b80bdca..46198b3a2 100644 --- a/kernel/arm64/sgemm_tcopy_16.S +++ b/kernel/arm64/sgemm_tcopy_16.S @@ -270,11 +270,6 @@ All rights reserved. ldr s1, [A02] ldr s2, [A03] ldr s3, [A04] - - add A01, A01, #4 - add A02, A02, #4 - add A03, A03, #4 - add A04, A04, #4 stp s0, s1, [B04] add B04, B04, #8 @@ -285,11 +280,6 @@ All rights reserved. ldr s5, [A06] ldr s6, [A07] ldr s7, [A08] - - ldr d4, [A05], #8 - ldr d5, [A06], #8 - ldr d6, [A07], #8 - ldr d7, [A08], #8 stp s4, s5, [B04] add B04, B04, #8 diff --git a/kernel/power/cdot.c b/kernel/power/cdot.c index c53fe0c02..b9e2d2ce5 100644 --- a/kernel/power/cdot.c +++ b/kernel/power/cdot.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #include "common.h" -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "cdot_microk_power10.c" #else #ifndef HAVE_KERNEL_8 @@ -120,7 +120,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA if ((inc_x == 1) && (inc_y == 1)) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) BLASLONG n1 = n & -16; #else BLASLONG n1 = n & -8; diff --git a/kernel/power/cswap.c b/kernel/power/cswap.c index 4d9b9ccd6..c2fde1c44 100644 --- a/kernel/power/cswap.c +++ b/kernel/power/cswap.c @@ -39,8 +39,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "cswap_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "cswap_microk_power10.c" +#elif defined(POWER10) +#include "cswap_microk_power8.c" #endif #endif diff --git a/kernel/power/dasum.c b/kernel/power/dasum.c index 0cdec3292..7507621cf 100644 --- a/kernel/power/dasum.c +++ b/kernel/power/dasum.c @@ -49,8 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "dasum_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "dasum_microk_power10.c" +#elif defined(POWER10) +#include "dasum_microk_power8.c" #endif #endif @@ -112,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) if ( inc_x == 1 ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; diff --git a/kernel/power/drot.c b/kernel/power/drot.c index 94d9d95a3..3229878e4 100644 --- a/kernel/power/drot.c +++ b/kernel/power/drot.c @@ -42,8 +42,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "drot_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "drot_microk_power10.c" +#elif defined(POWER10) +#include "drot_microk_power8.c" #endif #endif @@ -117,7 +119,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT if ( (inc_x == 1) && (inc_y == 1) ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; diff --git a/kernel/power/dscal.c b/kernel/power/dscal.c index 96c4e51bc..32c39a8f4 100644 --- a/kernel/power/dscal.c +++ b/kernel/power/dscal.c @@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "dscal_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "dscal_microk_power10.c" +#elif defined(POWER10) +#include "dscal_microk_power8.c" #endif #endif @@ -102,7 +104,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( da == 0.0 ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; @@ -136,7 +138,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS else { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; diff --git a/kernel/power/dswap.c b/kernel/power/dswap.c index 9e6229c6a..12476965b 100644 --- a/kernel/power/dswap.c +++ b/kernel/power/dswap.c @@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "dswap_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "swap_microk_power10.c" +#elif defined(POWER10) +#include "dswap_microk_power8.c" #endif #endif @@ -117,7 +119,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, if ( (inc_x == 1) && (inc_y == 1 )) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 32 ) { BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3; diff --git a/kernel/power/sasum.c b/kernel/power/sasum.c index af692a7fa..991d27508 100644 --- a/kernel/power/sasum.c +++ b/kernel/power/sasum.c @@ -49,8 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "sasum_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "sasum_microk_power10.c" +#elif defined(POWER10) +#include "sasum_microk_power8.c" #endif #endif @@ -112,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) if ( inc_x == 1 ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 32 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; diff --git a/kernel/power/srot.c b/kernel/power/srot.c index 3e4f93e2a..5a0d4b12e 100644 --- a/kernel/power/srot.c +++ b/kernel/power/srot.c @@ -42,8 +42,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "srot_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "srot_microk_power10.c" +#elif defined(POWER10) +#include "srot_microk_power8.c" #endif #endif @@ -117,7 +119,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT if ( (inc_x == 1) && (inc_y == 1) ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 16 ) { BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; diff --git a/kernel/power/sscal.c b/kernel/power/sscal.c index 65572a8c1..9ae9ccab8 100644 --- a/kernel/power/sscal.c +++ b/kernel/power/sscal.c @@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "sscal_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "sscal_microk_power10.c" +#elif defined(POWER10) +#include "sscal_microk_power8.c" #endif #endif @@ -104,7 +106,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( da == 0.0 ) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 32 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; @@ -138,7 +140,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS else { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 32 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; diff --git a/kernel/power/sswap.c b/kernel/power/sswap.c index dd249fd36..955ed02f0 100644 --- a/kernel/power/sswap.c +++ b/kernel/power/sswap.c @@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "sswap_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "swap_microk_power10.c" +#elif defined(POWER10) +#include "sswap_microk_power8.c" #endif #endif @@ -117,7 +119,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, if ( (inc_x == 1) && (inc_y == 1 )) { -#if defined(POWER10) +#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) if ( n >= 64 ) { BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7; diff --git a/kernel/power/zscal.c b/kernel/power/zscal.c index 0068138e8..59ddc149f 100644 --- a/kernel/power/zscal.c +++ b/kernel/power/zscal.c @@ -43,12 +43,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(DOUBLE) #include "zscal_microk_power8.c" #endif -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #if defined(DOUBLE) #include "zscal_microk_power10.c" #else #include "cscal_microk_power10.c" #endif +#elif defined(POWER10) +#if defined(DOUBLE) +#include "zscal_microk_power8.c" +#endif #endif #endif diff --git a/kernel/power/zswap.c b/kernel/power/zswap.c index 6cd3d9664..908802b71 100644 --- a/kernel/power/zswap.c +++ b/kernel/power/zswap.c @@ -39,8 +39,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(__VEC__) || defined(__ALTIVEC__) #if defined(POWER8) || defined(POWER9) #include "zswap_microk_power8.c" -#elif defined(POWER10) +#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__) #include "cswap_microk_power10.c" +#elif defined(POWER10) +#include "zswap_microk_power8.c" #endif #endif