Disambiguate whilelt
This commit is contained in:
parent
f971ef55f2
commit
24586bc4ff
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -52,7 +53,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
boffset = b;
|
boffset = b;
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
svbool_t pg = svwhilelt_b32(j, n);
|
svbool_t pg = svwhilelt_b32((uint64_t)j, (uint64_t)n);
|
||||||
uint32_t active = svcntp_b32(svptrue_b32(), pg);
|
uint32_t active = svcntp_b32(svptrue_b32(), pg);
|
||||||
do {
|
do {
|
||||||
|
|
||||||
|
@ -69,7 +70,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
aoffset += active * lda * 2;
|
aoffset += active * lda * 2;
|
||||||
|
|
||||||
j += svcntw();
|
j += svcntw();
|
||||||
pg = svwhilelt_b32(j, n);
|
pg = svwhilelt_b32((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b32(svptrue_b32(), pg);
|
active = svcntp_b32(svptrue_b32(), pg);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -50,7 +51,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
boffset = b;
|
boffset = b;
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
svbool_t pg = svwhilelt_b32(j, n);
|
svbool_t pg = svwhilelt_b32((uint64_t)j, (uint64_t)n);
|
||||||
uint32_t active = svcntp_b32(svptrue_b32(), pg);
|
uint32_t active = svcntp_b32(svptrue_b32(), pg);
|
||||||
do {
|
do {
|
||||||
|
|
||||||
|
@ -66,7 +67,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
aoffset += active * 2;
|
aoffset += active * 2;
|
||||||
|
|
||||||
j += svcntw();
|
j += svcntw();
|
||||||
pg = svwhilelt_b32(j, n);
|
pg = svwhilelt_b32((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b32(svptrue_b32(), pg);
|
active = svcntp_b32(svptrue_b32(), pg);
|
||||||
|
|
||||||
} while (svptest_any(svptrue_b32(), pg));
|
} while (svptest_any(svptrue_b32(), pg));
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -52,7 +53,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
svint64_t one_vec = svdup_s64(1LL);
|
svint64_t one_vec = svdup_s64(1LL);
|
||||||
|
|
||||||
int64_t j = 0;
|
int64_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b64(j, n);
|
svbool_t pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
||||||
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
||||||
svint64_t index = svindex_s64(0LL, 1LL);
|
svint64_t index = svindex_s64(0LL, 1LL);
|
||||||
|
@ -86,7 +87,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s64(posX);
|
posX_vec = svdup_s64(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b64(j, n);
|
pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b64(svptrue_b64(), pg);
|
active = svcntp_b64(svptrue_b64(), pg);
|
||||||
} while (svptest_any(svptrue_b64(), pg));
|
} while (svptest_any(svptrue_b64(), pg));
|
||||||
|
|
||||||
|
@ -99,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
|
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t j = 0;
|
int32_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b32(j, N);
|
svbool_t pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
||||||
svint32_t index_neg = svindex_s32(0, -1);
|
svint32_t index_neg = svindex_s32(0, -1);
|
||||||
svint32_t index = svindex_s32(0, 1);
|
svint32_t index = svindex_s32(0, 1);
|
||||||
|
@ -133,7 +134,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s32(posX);
|
posX_vec = svdup_s32(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b32(j, N);
|
pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
active = svcntp_b32(svptrue_b32(), pg);
|
active = svcntp_b32(svptrue_b32(), pg);
|
||||||
} while (svptest_any(svptrue_b32(), pg));
|
} while (svptest_any(svptrue_b32(), pg));
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -52,7 +53,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
svint64_t one_vec = svdup_s64(1LL);
|
svint64_t one_vec = svdup_s64(1LL);
|
||||||
|
|
||||||
int64_t j = 0;
|
int64_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b64(j, n);
|
svbool_t pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
||||||
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
||||||
svint64_t index = svindex_s64(0LL, 1LL);
|
svint64_t index = svindex_s64(0LL, 1LL);
|
||||||
|
@ -86,7 +87,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s64(posX);
|
posX_vec = svdup_s64(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b64(j, n);
|
pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b64(svptrue_b64(), pg);
|
active = svcntp_b64(svptrue_b64(), pg);
|
||||||
} while (svptest_any(svptrue_b64(), pg));
|
} while (svptest_any(svptrue_b64(), pg));
|
||||||
|
|
||||||
|
@ -99,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
|
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t j = 0;
|
int32_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b32(j, N);
|
svbool_t pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
||||||
svint32_t index_neg = svindex_s32(0, -1);
|
svint32_t index_neg = svindex_s32(0, -1);
|
||||||
svint32_t index = svindex_s32(0, 1);
|
svint32_t index = svindex_s32(0, 1);
|
||||||
|
@ -133,7 +134,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s32(posX);
|
posX_vec = svdup_s32(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b32(j, N);
|
pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
active = svcntp_b32(svptrue_b32(), pg);
|
active = svcntp_b32(svptrue_b32(), pg);
|
||||||
} while (svptest_any(svptrue_b32(), pg));
|
} while (svptest_any(svptrue_b32(), pg));
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -55,12 +56,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
jj = offset;
|
jj = offset;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
int64_t js = 0;
|
int64_t js = 0;
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t js = 0;
|
int32_t js = 0;
|
||||||
svbool_t pn = svwhilelt_b32(js, N);
|
svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do {
|
do {
|
||||||
|
@ -104,11 +105,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, N);
|
pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -56,13 +57,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
int64_t js = 0;
|
int64_t js = 0;
|
||||||
svint64_t index = svindex_s64(0LL, lda);
|
svint64_t index = svindex_s64(0LL, lda);
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t js = 0;
|
int32_t js = 0;
|
||||||
svint32_t index = svindex_s32(0, lda);
|
svint32_t index = svindex_s32(0, lda);
|
||||||
svbool_t pn = svwhilelt_b32(js, N);
|
svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do {
|
do {
|
||||||
|
@ -106,11 +107,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, N);
|
pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -55,12 +56,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
jj = offset;
|
jj = offset;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
int64_t js = 0;
|
int64_t js = 0;
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t js = 0;
|
int32_t js = 0;
|
||||||
svbool_t pn = svwhilelt_b32(js, N);
|
svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do {
|
do {
|
||||||
|
@ -104,11 +105,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, N);
|
pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -52,7 +53,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
boffset = b;
|
boffset = b;
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
svbool_t pg = svwhilelt_b64(j, n);
|
svbool_t pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
uint64_t active = svcntp_b64(svptrue_b64(), pg);
|
uint64_t active = svcntp_b64(svptrue_b64(), pg);
|
||||||
do {
|
do {
|
||||||
|
|
||||||
|
@ -69,7 +70,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
aoffset += active * lda * 2;
|
aoffset += active * lda * 2;
|
||||||
|
|
||||||
j += svcntd();
|
j += svcntd();
|
||||||
pg = svwhilelt_b64(j, n);
|
pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b64(svptrue_b64(), pg);
|
active = svcntp_b64(svptrue_b64(), pg);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -50,7 +51,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
boffset = b;
|
boffset = b;
|
||||||
|
|
||||||
j = 0;
|
j = 0;
|
||||||
svbool_t pg = svwhilelt_b64(j, n);
|
svbool_t pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
uint64_t active = svcntp_b64(svptrue_b64(), pg);
|
uint64_t active = svcntp_b64(svptrue_b64(), pg);
|
||||||
do {
|
do {
|
||||||
|
|
||||||
|
@ -66,7 +67,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
aoffset += active * 2;
|
aoffset += active * 2;
|
||||||
|
|
||||||
j += svcntd();
|
j += svcntd();
|
||||||
pg = svwhilelt_b64(j, n);
|
pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b64(svptrue_b64(), pg);
|
active = svcntp_b64(svptrue_b64(), pg);
|
||||||
|
|
||||||
} while (svptest_any(svptrue_b64(), pg));
|
} while (svptest_any(svptrue_b64(), pg));
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -54,7 +55,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
svint64_t one_vec = svdup_s64(1LL);
|
svint64_t one_vec = svdup_s64(1LL);
|
||||||
|
|
||||||
int64_t j = 0;
|
int64_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b64(j, n);
|
svbool_t pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
||||||
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
||||||
svint64_t index = svindex_s64(0LL, 1LL);
|
svint64_t index = svindex_s64(0LL, 1LL);
|
||||||
|
@ -79,7 +80,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
gat_ind = svadd_m(cmp, gat_ind, lda_vec);
|
gat_ind = svadd_m(cmp, gat_ind, lda_vec);
|
||||||
gat_ind = svadd_m(svnot_z(pg, cmp) , gat_ind, 2);
|
gat_ind = svadd_m(svnot_z(pg, cmp) , gat_ind, 2);
|
||||||
if (offset <= 0) {
|
if (offset <= 0) {
|
||||||
svbool_t off_g = svwhilelt_b64(offset, 0LL);
|
svbool_t off_g = svwhilelt_b64((uint64_t)offset, (uint64_t)0LL);
|
||||||
data_vec_imag = svneg_m(data_vec_imag, off_g, data_vec_imag);
|
data_vec_imag = svneg_m(data_vec_imag, off_g, data_vec_imag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s64(posX);
|
posX_vec = svdup_s64(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b64(j, n);
|
pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b64(svptrue_b64(), pg);
|
active = svcntp_b64(svptrue_b64(), pg);
|
||||||
} while (svptest_any(svptrue_b64(), pg));
|
} while (svptest_any(svptrue_b64(), pg));
|
||||||
|
|
||||||
|
@ -117,7 +118,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
|
|
||||||
int32_t j = 0;
|
int32_t j = 0;
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
svbool_t pg = svwhilelt_b32(j, N);
|
svbool_t pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
||||||
svint32_t index_neg = svindex_s32(0, -1);
|
svint32_t index_neg = svindex_s32(0, -1);
|
||||||
svint32_t index = svindex_s32(0, 1);
|
svint32_t index = svindex_s32(0, 1);
|
||||||
|
@ -142,7 +143,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
gat_ind = svadd_m(cmp, gat_ind, lda_vec);
|
gat_ind = svadd_m(cmp, gat_ind, lda_vec);
|
||||||
gat_ind = svadd_m(svnot_z(pg, cmp) , gat_ind, 2);
|
gat_ind = svadd_m(svnot_z(pg, cmp) , gat_ind, 2);
|
||||||
if (offset <= 0) {
|
if (offset <= 0) {
|
||||||
svbool_t off_g = svwhilelt_b32(offset, 0);
|
svbool_t off_g = svwhilelt_b32((uint32_t)offset, (uint32_t)0);
|
||||||
data_vec_imag = svneg_m(data_vec_imag, off_g, data_vec_imag);
|
data_vec_imag = svneg_m(data_vec_imag, off_g, data_vec_imag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s32(posX);
|
posX_vec = svdup_s32(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b32(j, N);
|
pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
active = svcntp_b32(svptrue_b32(), pg);
|
active = svcntp_b32(svptrue_b32(), pg);
|
||||||
} while (svptest_any(svptrue_b32(), pg));
|
} while (svptest_any(svptrue_b32(), pg));
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -54,7 +55,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
svint64_t one_vec = svdup_s64(1LL);
|
svint64_t one_vec = svdup_s64(1LL);
|
||||||
|
|
||||||
int64_t j = 0;
|
int64_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b64(j, n);
|
svbool_t pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
||||||
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
||||||
svint64_t index = svindex_s64(0LL, 1LL);
|
svint64_t index = svindex_s64(0LL, 1LL);
|
||||||
|
@ -80,7 +81,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
gat_ind = svadd_m(svnot_z(pg, cmp) , gat_ind, lda_vec);
|
gat_ind = svadd_m(svnot_z(pg, cmp) , gat_ind, lda_vec);
|
||||||
data_vec_imag = svneg_z(pg, data_vec_imag);
|
data_vec_imag = svneg_z(pg, data_vec_imag);
|
||||||
if (offset <= 0) {
|
if (offset <= 0) {
|
||||||
svbool_t off_g = svwhilelt_b64(offset, 0LL);
|
svbool_t off_g = svwhilelt_b64((uint64_t)offset, (uint64_t)0LL);
|
||||||
data_vec_imag = svneg_m(data_vec_imag, off_g, data_vec_imag);
|
data_vec_imag = svneg_m(data_vec_imag, off_g, data_vec_imag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,7 +101,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s64(posX);
|
posX_vec = svdup_s64(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b64(j, n);
|
pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b64(svptrue_b64(), pg);
|
active = svcntp_b64(svptrue_b64(), pg);
|
||||||
} while (svptest_any(svptrue_b64(), pg));
|
} while (svptest_any(svptrue_b64(), pg));
|
||||||
#else
|
#else
|
||||||
|
@ -116,7 +117,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
|
|
||||||
int32_t j = 0;
|
int32_t j = 0;
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
svbool_t pg = svwhilelt_b32(j, N);
|
svbool_t pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
||||||
svint32_t index_neg = svindex_s32(0, -1);
|
svint32_t index_neg = svindex_s32(0, -1);
|
||||||
svint32_t index = svindex_s32(0, 1);
|
svint32_t index = svindex_s32(0, 1);
|
||||||
|
@ -142,7 +143,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
gat_ind = svadd_m(svnot_z(pg, cmp) , gat_ind, lda_vec);
|
gat_ind = svadd_m(svnot_z(pg, cmp) , gat_ind, lda_vec);
|
||||||
data_vec_imag = svneg_z(pg, data_vec_imag);
|
data_vec_imag = svneg_z(pg, data_vec_imag);
|
||||||
if (offset <= 0) {
|
if (offset <= 0) {
|
||||||
svbool_t off_g = svwhilelt_b32(offset, 0);
|
svbool_t off_g = svwhilelt_b32((uint32_t)offset, (uint32_t)0);
|
||||||
data_vec_imag = svneg_m(data_vec_imag, off_g, data_vec_imag);
|
data_vec_imag = svneg_m(data_vec_imag, off_g, data_vec_imag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s32(posX);
|
posX_vec = svdup_s32(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b32(j, N);
|
pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
active = svcntp_b32(svptrue_b32(), pg);
|
active = svcntp_b32(svptrue_b32(), pg);
|
||||||
} while (svptest_any(svptrue_b32(), pg));
|
} while (svptest_any(svptrue_b32(), pg));
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -53,7 +54,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
svint64_t one_vec = svdup_s64(1LL);
|
svint64_t one_vec = svdup_s64(1LL);
|
||||||
|
|
||||||
int64_t j = 0;
|
int64_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b64(j, n);
|
svbool_t pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
||||||
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
||||||
svint64_t index = svindex_s64(0LL, 1LL);
|
svint64_t index = svindex_s64(0LL, 1LL);
|
||||||
|
@ -90,7 +91,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s64(posX);
|
posX_vec = svdup_s64(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b64(j, n);
|
pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b64(svptrue_b64(), pg);
|
active = svcntp_b64(svptrue_b64(), pg);
|
||||||
} while (svptest_any(svptrue_b64(), pg));
|
} while (svptest_any(svptrue_b64(), pg));
|
||||||
|
|
||||||
|
@ -103,7 +104,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
|
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t j = 0;
|
int32_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b32(j, N);
|
svbool_t pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
||||||
svint32_t index_neg = svindex_s32(0, -1);
|
svint32_t index_neg = svindex_s32(0, -1);
|
||||||
svint32_t index = svindex_s32(0, 1);
|
svint32_t index = svindex_s32(0, 1);
|
||||||
|
@ -140,7 +141,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s32(posX);
|
posX_vec = svdup_s32(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b32(j, N);
|
pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
active = svcntp_b32(svptrue_b32(), pg);
|
active = svcntp_b32(svptrue_b32(), pg);
|
||||||
} while (svptest_any(svptrue_b32(), pg));
|
} while (svptest_any(svptrue_b32(), pg));
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -53,7 +54,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
svint64_t one_vec = svdup_s64(1LL);
|
svint64_t one_vec = svdup_s64(1LL);
|
||||||
|
|
||||||
int64_t j = 0;
|
int64_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b64(j, n);
|
svbool_t pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
int64_t active = svcntp_b64(svptrue_b64(), pg);
|
||||||
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
svint64_t index_neg = svindex_s64(0LL, -1LL);
|
||||||
svint64_t index = svindex_s64(0LL, 1LL);
|
svint64_t index = svindex_s64(0LL, 1LL);
|
||||||
|
@ -90,7 +91,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s64(posX);
|
posX_vec = svdup_s64(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b64(j, n);
|
pg = svwhilelt_b64((uint64_t)j, (uint64_t)n);
|
||||||
active = svcntp_b64(svptrue_b64(), pg);
|
active = svcntp_b64(svptrue_b64(), pg);
|
||||||
} while (svptest_any(svptrue_b64(), pg));
|
} while (svptest_any(svptrue_b64(), pg));
|
||||||
|
|
||||||
|
@ -103,7 +104,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
|
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t j = 0;
|
int32_t j = 0;
|
||||||
svbool_t pg = svwhilelt_b32(j, N);
|
svbool_t pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
int32_t active = svcntp_b32(svptrue_b32(), pg);
|
||||||
svint32_t index_neg = svindex_s32(0, -1);
|
svint32_t index_neg = svindex_s32(0, -1);
|
||||||
svint32_t index = svindex_s32(0, 1);
|
svint32_t index = svindex_s32(0, 1);
|
||||||
|
@ -140,7 +141,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posX += sve_size;
|
posX += sve_size;
|
||||||
posX_vec = svdup_s32(posX);
|
posX_vec = svdup_s32(posX);
|
||||||
j += sve_size;
|
j += sve_size;
|
||||||
pg = svwhilelt_b32(j, N);
|
pg = svwhilelt_b32((uint32_t)j, (uint32_t)N);
|
||||||
active = svcntp_b32(svptrue_b32(), pg);
|
active = svcntp_b32(svptrue_b32(), pg);
|
||||||
} while (svptest_any(svptrue_b32(), pg));
|
} while (svptest_any(svptrue_b32(), pg));
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -54,11 +55,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
FLOAT *ao;
|
FLOAT *ao;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
svint64_t index = svindex_s64(0LL, lda);
|
svint64_t index = svindex_s64(0LL, lda);
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
svint32_t index = svindex_s32(0, lda);
|
svint32_t index = svindex_s32(0, lda);
|
||||||
svbool_t pn = svwhilelt_b32(js, n);
|
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do
|
do
|
||||||
|
@ -132,11 +133,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posY += n_active;
|
posY += n_active;
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, n);
|
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -53,10 +54,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
FLOAT *ao;
|
FLOAT *ao;
|
||||||
js = 0;
|
js = 0;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
svbool_t pn = svwhilelt_b32(js, n);
|
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do
|
do
|
||||||
|
@ -129,11 +130,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posY += n_active;
|
posY += n_active;
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, n);
|
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -54,11 +55,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
FLOAT *ao;
|
FLOAT *ao;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
svint64_t index = svindex_s64(0LL, lda);
|
svint64_t index = svindex_s64(0LL, lda);
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
svint32_t index = svindex_s32(0, lda);
|
svint32_t index = svindex_s32(0, lda);
|
||||||
svbool_t pn = svwhilelt_b32(js, n);
|
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do
|
do
|
||||||
|
@ -132,11 +133,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posY += n_active;
|
posY += n_active;
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, n);
|
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -53,10 +54,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
FLOAT *ao;
|
FLOAT *ao;
|
||||||
js = 0;
|
js = 0;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
svbool_t pn = svwhilelt_b32(js, n);
|
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do
|
do
|
||||||
|
@ -128,11 +129,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
|
||||||
posY += n_active;
|
posY += n_active;
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, n);
|
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -52,13 +53,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
int64_t js = 0;
|
int64_t js = 0;
|
||||||
svint64_t index = svindex_s64(0LL, lda);
|
svint64_t index = svindex_s64(0LL, lda);
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t js = 0;
|
int32_t js = 0;
|
||||||
svint32_t index = svindex_s32(0, lda);
|
svint32_t index = svindex_s32(0, lda);
|
||||||
svbool_t pn = svwhilelt_b32(js, N);
|
svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do {
|
do {
|
||||||
|
@ -106,11 +107,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, N);
|
pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -51,12 +52,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
jj = offset;
|
jj = offset;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
int64_t js = 0;
|
int64_t js = 0;
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t js = 0;
|
int32_t js = 0;
|
||||||
svbool_t pn = svwhilelt_b32(js, N);
|
svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do {
|
do {
|
||||||
|
@ -102,11 +103,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, N);
|
pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -52,13 +53,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
int64_t js = 0;
|
int64_t js = 0;
|
||||||
svint64_t index = svindex_s64(0LL, lda);
|
svint64_t index = svindex_s64(0LL, lda);
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t js = 0;
|
int32_t js = 0;
|
||||||
svint32_t index = svindex_s32(0, lda);
|
svint32_t index = svindex_s32(0, lda);
|
||||||
svbool_t pn = svwhilelt_b32(js, N);
|
svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do {
|
do {
|
||||||
|
@ -106,11 +107,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, N);
|
pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* Copyright 2023 The OpenBLAS Project */
|
||||||
/* All rights reserved. */
|
/* All rights reserved. */
|
||||||
/* */
|
/* */
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
@ -51,12 +52,12 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
jj = offset;
|
jj = offset;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
int64_t js = 0;
|
int64_t js = 0;
|
||||||
svbool_t pn = svwhilelt_b64(js, n);
|
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
int n_active = svcntp_b64(svptrue_b64(), pn);
|
int n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
#else
|
#else
|
||||||
int32_t N = n;
|
int32_t N = n;
|
||||||
int32_t js = 0;
|
int32_t js = 0;
|
||||||
svbool_t pn = svwhilelt_b32(js, N);
|
svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
int n_active = svcntp_b32(svptrue_b32(), pn);
|
int n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
#endif
|
#endif
|
||||||
do {
|
do {
|
||||||
|
@ -102,11 +103,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
|
||||||
|
|
||||||
js += n_active;
|
js += n_active;
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
pn = svwhilelt_b64(js, n);
|
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
|
||||||
n_active = svcntp_b64(svptrue_b64(), pn);
|
n_active = svcntp_b64(svptrue_b64(), pn);
|
||||||
} while (svptest_any(svptrue_b64(), pn));
|
} while (svptest_any(svptrue_b64(), pn));
|
||||||
#else
|
#else
|
||||||
pn = svwhilelt_b32(js, N);
|
pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
|
||||||
n_active = svcntp_b32(svptrue_b32(), pn);
|
n_active = svcntp_b32(svptrue_b32(), pn);
|
||||||
} while (svptest_any(svptrue_b32(), pn));
|
} while (svptest_any(svptrue_b32(), pn));
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue