Work around Clang failing to disambiguate SVE intrinsics and add AppleClang crossbuild to MacOS/arm64 DYNAMIC_ARCH in AzureCI (#4140)
* Add AppleClang crossbuild to MacOS/arm64 DYNAMIC_ARCH * add casts to disambiguate svwhilelt for clang
This commit is contained in:
parent
affeef0b9c
commit
3d31191b0f
|
@ -271,6 +271,19 @@ jobs:
|
|||
- script: |
|
||||
make TARGET=ARMV7 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
|
||||
|
||||
- job: OSX_xbuild_DYNAMIC_ARM64
|
||||
pool:
|
||||
vmImage: 'macOS-11'
|
||||
variables:
|
||||
CC: /Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX11.3.sdk -arch arm64
|
||||
steps:
|
||||
- script: |
|
||||
ls /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
|
||||
/Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -arch arm64 --print-supported-cpus
|
||||
/Applications/Xcode_11.7.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang --version
|
||||
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
|
||||
|
||||
- job: ALPINE_MUSL
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
|
|
|
@ -50,8 +50,8 @@ static FLOAT dot_kernel_sve(BLASLONG n, FLOAT *x, FLOAT *y) {
|
|||
BLASLONG sve_width = SVE_WIDTH;
|
||||
|
||||
for (BLASLONG i = 0; i < n; i += sve_width * 2) {
|
||||
svbool_t pg_a = SVE_WHILELT(i, n);
|
||||
svbool_t pg_b = SVE_WHILELT(i + sve_width, n);
|
||||
svbool_t pg_a = SVE_WHILELT((uint64_t)i, (uint64_t)n);
|
||||
svbool_t pg_b = SVE_WHILELT((uint64_t)(i + sve_width), (uint64_t)n);
|
||||
|
||||
SVE_TYPE x_vec_a = svld1(pg_a, &x[i]);
|
||||
SVE_TYPE y_vec_a = svld1(pg_a, &y[i]);
|
||||
|
|
|
@ -107,7 +107,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b) {
|
|||
BLASLONG remaining_n = n - single_vectors_n;
|
||||
if (remaining_n) {
|
||||
a_offset_inner = a_offset;
|
||||
svbool_t pg = SV_WHILE(0L, remaining_n);
|
||||
svbool_t pg = SV_WHILE((uint64_t)0L, (uint64_t)remaining_n);
|
||||
uint64_t active = remaining_n;
|
||||
uint64_t i_cnt = m >> 2;
|
||||
while (i_cnt--) {
|
||||
|
|
|
@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
|||
BLASLONG remaining_n = n - single_vectors_n;
|
||||
if (remaining_n) {
|
||||
a_offset_inner = a_offset;
|
||||
svbool_t pg = SV_WHILE(0L, remaining_n);
|
||||
svbool_t pg = SV_WHILE((uint64_t)0L, (uint64_t)remaining_n);
|
||||
uint64_t active = remaining_n;
|
||||
uint64_t i_cnt = m >> 2;
|
||||
while (i_cnt--) {
|
||||
|
|
|
@ -52,11 +52,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
|||
FLOAT *ao;
|
||||
#ifdef DOUBLE
|
||||
svint64_t index = svindex_s64(0LL, lda);
|
||||
svbool_t pn = svwhilelt_b64(js, n);
|
||||
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
#else
|
||||
svint32_t index = svindex_s32(0, lda);
|
||||
svbool_t pn = svwhilelt_b32(js, n);
|
||||
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
#endif
|
||||
do
|
||||
|
@ -123,11 +123,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
|||
posY += n_active;
|
||||
js += n_active;
|
||||
#ifdef DOUBLE
|
||||
pn = svwhilelt_b64(js, n);
|
||||
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
} while (svptest_any(svptrue_b64(), pn));
|
||||
#else
|
||||
pn = svwhilelt_b32(js, n);
|
||||
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
} while (svptest_any(svptrue_b32(), pn));
|
||||
#endif
|
||||
|
|
|
@ -51,10 +51,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
|||
FLOAT *ao;
|
||||
js = 0;
|
||||
#ifdef DOUBLE
|
||||
svbool_t pn = svwhilelt_b64(js, n);
|
||||
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
#else
|
||||
svbool_t pn = svwhilelt_b32(js, n);
|
||||
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
#endif
|
||||
do
|
||||
|
@ -122,11 +122,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
|||
posY += n_active;
|
||||
js += n_active;
|
||||
#ifdef DOUBLE
|
||||
pn = svwhilelt_b64(js, n);
|
||||
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
} while (svptest_any(svptrue_b64(), pn));
|
||||
#else
|
||||
pn = svwhilelt_b32(js, n);
|
||||
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
} while (svptest_any(svptrue_b32(), pn));
|
||||
#endif
|
||||
|
|
|
@ -52,11 +52,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
|||
FLOAT *ao;
|
||||
#ifdef DOUBLE
|
||||
svint64_t index = svindex_s64(0LL, lda);
|
||||
svbool_t pn = svwhilelt_b64(js, n);
|
||||
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
#else
|
||||
svint32_t index = svindex_s32(0, lda);
|
||||
svbool_t pn = svwhilelt_b32(js, n);
|
||||
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
#endif
|
||||
do
|
||||
|
@ -123,11 +123,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
|||
posY += n_active;
|
||||
js += n_active;
|
||||
#ifdef DOUBLE
|
||||
pn = svwhilelt_b64(js, n);
|
||||
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
} while (svptest_any(svptrue_b64(), pn));
|
||||
#else
|
||||
pn = svwhilelt_b32(js, n);
|
||||
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
} while (svptest_any(svptrue_b32(), pn));
|
||||
#endif
|
||||
|
|
|
@ -51,10 +51,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
|||
FLOAT *ao;
|
||||
js = 0;
|
||||
#ifdef DOUBLE
|
||||
svbool_t pn = svwhilelt_b64(js, n);
|
||||
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
#else
|
||||
svbool_t pn = svwhilelt_b32(js, n);
|
||||
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
#endif
|
||||
do
|
||||
|
@ -121,11 +121,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
|||
posY += n_active;
|
||||
js += n_active;
|
||||
#ifdef DOUBLE
|
||||
pn = svwhilelt_b64(js, n);
|
||||
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
} while (svptest_any(svptrue_b64(), pn));
|
||||
#else
|
||||
pn = svwhilelt_b32(js, n);
|
||||
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
} while (svptest_any(svptrue_b32(), pn));
|
||||
#endif
|
||||
|
|
|
@ -56,13 +56,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
|||
#ifdef DOUBLE
|
||||
int64_t js = 0;
|
||||
svint64_t index = svindex_s64(0LL, lda);
|
||||
svbool_t pn = svwhilelt_b64(js, n);
|
||||
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
#else
|
||||
int32_t N = n;
|
||||
int32_t js = 0;
|
||||
svint32_t index = svindex_s32(0, lda);
|
||||
svbool_t pn = svwhilelt_b32(js, N);
|
||||
svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
#endif
|
||||
do {
|
||||
|
@ -106,11 +106,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
|||
|
||||
js += n_active;
|
||||
#ifdef DOUBLE
|
||||
pn = svwhilelt_b64(js, n);
|
||||
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||
} while (svptest_any(svptrue_b64(), pn));
|
||||
#else
|
||||
pn = svwhilelt_b32(js, N);
|
||||
pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||
} while (svptest_any(svptrue_b32(), pn));
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue