From 936966a42c1f2f0c63b49dc0a47e7e3039e520eb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 27 Aug 2020 10:59:08 +0200 Subject: [PATCH 01/13] Make ILAENV and xGETRF2 functions available --- relapack/src/lapack.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/relapack/src/lapack.h b/relapack/src/lapack.h index 776b0589f..9e9cdff7e 100644 --- a/relapack/src/lapack.h +++ b/relapack/src/lapack.h @@ -4,6 +4,13 @@ extern blasint LAPACK(lsame)(const char *, const char *); extern blasint LAPACK(xerbla)(const char *, const blasint *, int); +extern const blasint LAPACK(ilaenv)(const blasint *, const char*, const char*, const blasint* , int , int, int ); + +extern void LAPACK(sgetrf2)(const blasint *, const blasint *, float *, const blasint *, blasint *, blasint *); +extern void LAPACK(dgetrf2)(const blasint *, const blasint *, double *, const blasint *, blasint *, blasint *); +extern void LAPACK(cgetrf2)(const blasint *, const blasint *, float *, const blasint *, blasint *, blasint *); +extern void LAPACK(zgetrf2)(const blasint *, const blasint *, double *, const blasint *, blasint *, blasint *); + extern void LAPACK(slaswp)(const blasint *, float *, const blasint *, const blasint *, const blasint *, const blasint *, const blasint *); extern void LAPACK(dlaswp)(const blasint *, double *, const blasint *, const blasint *, const blasint *, const blasint *, const blasint *); extern void LAPACK(claswp)(const blasint *, float *, const blasint *, const blasint *, const blasint *, const blasint *, const blasint *); From 6797a3a1e0b3ad9f5df62e2b751c8d5ac50cbaf5 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 27 Aug 2020 11:15:12 +0200 Subject: [PATCH 02/13] Add early returns --- relapack/src/cgetrf.c | 9 +++++++-- relapack/src/chegst.c | 2 ++ relapack/src/chetrf_rook.c | 4 ++-- relapack/src/clauum.c | 2 ++ relapack/src/cpotrf.c | 3 +++ relapack/src/csytrf.c | 3 ++- relapack/src/csytrf_rook.c | 4 ++-- relapack/src/ctgsyl.c | 7 +++++++ relapack/src/ctrsyl.c | 5 +++++ relapack/src/ctrtri.c | 2 ++ 10 files changed, 34 insertions(+), 7 deletions(-) diff --git a/relapack/src/cgetrf.c b/relapack/src/cgetrf.c index 878c9ec15..bf9ca53f4 100644 --- a/relapack/src/cgetrf.c +++ b/relapack/src/cgetrf.c @@ -30,6 +30,8 @@ void RELAPACK_cgetrf( return; } + if (*m == 0 || *n == 0) return; + const blasint sn = MIN(*m, *n); RELAPACK_cgetrf_rec(m, &sn, A, ldA, ipiv, info); @@ -62,9 +64,11 @@ static void RELAPACK_cgetrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_CGETRF, 1)) { + if (*m == 0 || *n == 0) return; + + if ( *n <= MAX(CROSSOVER_CGETRF, 1)) { // Unblocked - LAPACK(cgetf2)(m, n, A, ldA, ipiv, info); + LAPACK(cgetrf2)(m, n, A, ldA, ipiv, info); return; } @@ -96,6 +100,7 @@ static void RELAPACK_cgetrf_rec( // recursion(A_L, ipiv_T) RELAPACK_cgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info); + if (*info) return; // apply pivots to A_R LAPACK(claswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE); diff --git a/relapack/src/chegst.c b/relapack/src/chegst.c index fe77b03ea..8557c2952 100644 --- a/relapack/src/chegst.c +++ b/relapack/src/chegst.c @@ -40,6 +40,8 @@ void RELAPACK_chegst( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; diff --git a/relapack/src/chetrf_rook.c b/relapack/src/chetrf_rook.c index 3d2fa3216..9ed1261cf 100644 --- a/relapack/src/chetrf_rook.c +++ b/relapack/src/chetrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_chetrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_chetrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("CHETRF", &minfo, strlen("CHETRF")); + LAPACK(xerbla)("CHETRF_ROOK", &minfo, strlen("CHETRF_ROOK")); return; } diff --git a/relapack/src/clauum.c b/relapack/src/clauum.c index 2bc93f182..58a14e7da 100644 --- a/relapack/src/clauum.c +++ b/relapack/src/clauum.c @@ -32,6 +32,8 @@ void RELAPACK_clauum( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; diff --git a/relapack/src/cpotrf.c b/relapack/src/cpotrf.c index 0f8e7ebb0..db06c6fef 100644 --- a/relapack/src/cpotrf.c +++ b/relapack/src/cpotrf.c @@ -32,6 +32,8 @@ void RELAPACK_cpotrf( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -46,6 +48,7 @@ static void RELAPACK_cpotrf_rec( float *A, const blasint *ldA, blasint *info ){ + if (*n == 0) return; if (*n <= MAX(CROSSOVER_CPOTRF, 1)) { // Unblocked diff --git a/relapack/src/csytrf.c b/relapack/src/csytrf.c index 2ebc31001..807c91ece 100644 --- a/relapack/src/csytrf.c +++ b/relapack/src/csytrf.c @@ -36,7 +36,7 @@ void RELAPACK_csytrf( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -67,6 +67,7 @@ void RELAPACK_csytrf( blasint nout; // Recursive kernel +if (*n != 0) RELAPACK_csytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/csytrf_rook.c b/relapack/src/csytrf_rook.c index e8a9865cc..105c6b8b6 100644 --- a/relapack/src/csytrf_rook.c +++ b/relapack/src/csytrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_csytrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_csytrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("CSYTRF", &minfo, strlen("CSYTRF")); + LAPACK(xerbla)("CSYTRF_ROOK", &minfo, strlen("CSYTRF_ROOK")); return; } diff --git a/relapack/src/ctgsyl.c b/relapack/src/ctgsyl.c index 704f3ef23..632bbc14e 100644 --- a/relapack/src/ctgsyl.c +++ b/relapack/src/ctgsyl.c @@ -68,6 +68,13 @@ void RELAPACK_ctgsyl( return; } + if ( *m == 0 || *n == 0) { + *scale = 1.; + if (notran && (*ijob != 0)) + *dif = 0.; + return; + } + // Clean char * arguments const char cleantrans = notran ? 'N' : 'C'; diff --git a/relapack/src/ctrsyl.c b/relapack/src/ctrsyl.c index fed6e847e..f7b841cb0 100644 --- a/relapack/src/ctrsyl.c +++ b/relapack/src/ctrsyl.c @@ -47,6 +47,11 @@ void RELAPACK_ctrsyl( return; } + if (*m == 0 || *n == 0) { + *scale = 1.; + return; + } + // Clean char * arguments const char cleantranA = notransA ? 'N' : 'C'; const char cleantranB = notransB ? 'N' : 'C'; diff --git a/relapack/src/ctrtri.c b/relapack/src/ctrtri.c index 5201a24c7..8d736007b 100644 --- a/relapack/src/ctrtri.c +++ b/relapack/src/ctrtri.c @@ -36,6 +36,8 @@ void RELAPACK_ctrtri( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; const char cleandiag = nounit ? 'N' : 'U'; From c9b67141f0827ccdfefd0197b6f0daba50f35dc2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 27 Aug 2020 11:20:31 +0200 Subject: [PATCH 03/13] Add early returns --- relapack/src/dgetrf.c | 12 ++++++------ relapack/src/dsytrf.c | 3 ++- relapack/src/dsytrf_rook.c | 4 ++-- relapack/src/dtrsyl.c | 5 +++++ relapack/src/zgetrf.c | 9 +++++++-- relapack/src/zhetrf_rook.c | 4 ++-- relapack/src/zsytrf.c | 3 ++- relapack/src/zsytrf_rook.c | 5 +++-- relapack/src/ztrsyl.c | 5 +++++ relapack/src/ztrtri.c | 4 ++-- 10 files changed, 36 insertions(+), 18 deletions(-) diff --git a/relapack/src/dgetrf.c b/relapack/src/dgetrf.c index be960fde9..3ebfb18d2 100644 --- a/relapack/src/dgetrf.c +++ b/relapack/src/dgetrf.c @@ -29,15 +29,16 @@ void RELAPACK_dgetrf( return; } - const blasint sn = MIN(*m, *n); + if (*m == 0 || *n == 0) return; + const blasint sn = MIN(*m, *n); RELAPACK_dgetrf_rec(m, &sn, A, ldA, ipiv, info); // Right remainder if (*m < *n) { // Constants const double ONE[] = { 1. }; - const blasint iONE[] = { 1. }; + const blasint iONE[] = { 1 }; // Splitting const blasint rn = *n - *m; @@ -60,13 +61,11 @@ static void RELAPACK_dgetrf_rec( double *A, const blasint *ldA, blasint *ipiv, blasint *info ) { - - if (*n <= MAX(CROSSOVER_DGETRF, 1)) { + if ( *n <= MAX(CROSSOVER_DGETRF, 1)) { // Unblocked - LAPACK(dgetf2)(m, n, A, ldA, ipiv, info); + LAPACK(dgetrf2)(m, n, A, ldA, ipiv, info); return; } - // Constants const double ONE[] = { 1. }; const double MONE[] = { -1. }; @@ -95,6 +94,7 @@ static void RELAPACK_dgetrf_rec( // recursion(A_L, ipiv_T) RELAPACK_dgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info); + if (*info) return; // apply pivots to A_R LAPACK(dlaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE); diff --git a/relapack/src/dsytrf.c b/relapack/src/dsytrf.c index 43d28f94e..ba869ad11 100644 --- a/relapack/src/dsytrf.c +++ b/relapack/src/dsytrf.c @@ -36,7 +36,7 @@ void RELAPACK_dsytrf( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -67,6 +67,7 @@ void RELAPACK_dsytrf( blasint nout; // Recursive kernel +if (*n != 0) RELAPACK_dsytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/dsytrf_rook.c b/relapack/src/dsytrf_rook.c index 78fa652ab..fcdc2809f 100644 --- a/relapack/src/dsytrf_rook.c +++ b/relapack/src/dsytrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_dsytrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork <1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_dsytrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("DSYTRF", &minfo, strlen("DSYTRF")); + LAPACK(xerbla)("DSYTRF_ROOK", &minfo, strlen("DSYTRF_ROOK")); return; } diff --git a/relapack/src/dtrsyl.c b/relapack/src/dtrsyl.c index 766377300..4948c4977 100644 --- a/relapack/src/dtrsyl.c +++ b/relapack/src/dtrsyl.c @@ -49,6 +49,11 @@ void RELAPACK_dtrsyl( return; } + if (*m == 0 || *n == 0) { + *scale = 1.; + return; + } + // Clean char * arguments const char cleantranA = notransA ? 'N' : (transA ? 'T' : 'C'); const char cleantranB = notransB ? 'N' : (transB ? 'T' : 'C'); diff --git a/relapack/src/zgetrf.c b/relapack/src/zgetrf.c index b0d14ffb1..8c3e8a8e8 100644 --- a/relapack/src/zgetrf.c +++ b/relapack/src/zgetrf.c @@ -30,6 +30,7 @@ void RELAPACK_zgetrf( return; } + if (*m == 0 || *n == 0) return; const blasint sn = MIN(*m, *n); RELAPACK_zgetrf_rec(m, &sn, A, ldA, ipiv, info); @@ -62,9 +63,11 @@ static void RELAPACK_zgetrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_ZGETRF, 1)) { + if (*m == 0 || *n == 0) return; + + if ( *n <= MAX(CROSSOVER_ZGETRF, 1)) { // Unblocked - LAPACK(zgetf2)(m, n, A, ldA, ipiv, info); + LAPACK(zgetrf2)(m, n, A, ldA, ipiv, info); return; } @@ -96,6 +99,8 @@ static void RELAPACK_zgetrf_rec( // recursion(A_L, ipiv_T) RELAPACK_zgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info); +if (*info) return; + // apply pivots to A_R LAPACK(zlaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE); diff --git a/relapack/src/zhetrf_rook.c b/relapack/src/zhetrf_rook.c index 285aea96e..605e3a77f 100644 --- a/relapack/src/zhetrf_rook.c +++ b/relapack/src/zhetrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_zhetrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_zhetrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("ZHETRF", &minfo, strlen("ZHETRF")); + LAPACK(xerbla)("ZHETRF_ROOK", &minfo, strlen("ZHETRF_ROOK")); return; } diff --git a/relapack/src/zsytrf.c b/relapack/src/zsytrf.c index f3412ad8f..59daba02f 100644 --- a/relapack/src/zsytrf.c +++ b/relapack/src/zsytrf.c @@ -36,7 +36,7 @@ void RELAPACK_zsytrf( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -67,6 +67,7 @@ void RELAPACK_zsytrf( blasint nout; // Recursive kernel + if (*n != 0) RELAPACK_zsytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/zsytrf_rook.c b/relapack/src/zsytrf_rook.c index fc6d73645..0fd8e7033 100644 --- a/relapack/src/zsytrf_rook.c +++ b/relapack/src/zsytrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_zsytrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_zsytrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("ZSYTRF", &minfo, strlen("ZSYTRF")); + LAPACK(xerbla)("ZSYTRF_ROOK", &minfo, strlen("ZSYTRF_ROOK")); return; } @@ -67,6 +67,7 @@ void RELAPACK_zsytrf_rook( blasint nout; // Recursive kernel + if (*n != 0) RELAPACK_zsytrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/ztrsyl.c b/relapack/src/ztrsyl.c index 567ef115a..9d0107526 100644 --- a/relapack/src/ztrsyl.c +++ b/relapack/src/ztrsyl.c @@ -47,6 +47,11 @@ void RELAPACK_ztrsyl( return; } + if (*m == 0 || *n == 0) { + *scale = 1.; + return; + } + // Clean char * arguments const char cleantranA = notransA ? 'N' : 'C'; const char cleantranB = notransB ? 'N' : 'C'; diff --git a/relapack/src/ztrtri.c b/relapack/src/ztrtri.c index 3f6606d84..54854f525 100644 --- a/relapack/src/ztrtri.c +++ b/relapack/src/ztrtri.c @@ -69,8 +69,8 @@ static void RELAPACK_ztrtri_rec( } // Constants - const double ONE[] = { 1. }; - const double MONE[] = { -1. }; + const double ONE[] = { 1., 0. }; + const double MONE[] = { -1. , 0. }; // Splitting const blasint n1 = ZREC_SPLIT(*n); From d64cc2be8143225330bbc5b7877b155a1df3a90f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 27 Aug 2020 11:22:50 +0200 Subject: [PATCH 04/13] Add early returns --- relapack/src/sgetrf.c | 15 +++++++++++---- relapack/src/ssytrf.c | 3 ++- relapack/src/ssytrf_rook.c | 5 +++-- relapack/src/strsyl.c | 5 +++++ 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/relapack/src/sgetrf.c b/relapack/src/sgetrf.c index 0231cc166..a0c7015fd 100644 --- a/relapack/src/sgetrf.c +++ b/relapack/src/sgetrf.c @@ -14,7 +14,6 @@ void RELAPACK_sgetrf( float *A, const blasint *ldA, blasint *ipiv, blasint *info ) { - // Check arguments *info = 0; if (*m < 0) @@ -28,6 +27,9 @@ void RELAPACK_sgetrf( LAPACK(xerbla)("SGETRF", &minfo, strlen("SGETRF")); return; } + + if (*m == 0 || *n == 0) return; + const blasint sn = MIN(*m, *n); RELAPACK_sgetrf_rec(m, &sn, A, ldA, ipiv, info); @@ -35,7 +37,7 @@ void RELAPACK_sgetrf( if (*m < *n) { // Constants const float ONE[] = { 1. }; - const blasint iONE[] = { 1. }; + const blasint iONE[] = { 1 }; // Splitting const blasint rn = *n - *m; @@ -58,9 +60,12 @@ static void RELAPACK_sgetrf_rec( float *A, const blasint *ldA, blasint *ipiv, blasint *info ) { - if (*n <= MAX(CROSSOVER_SGETRF, 1)) { + + if (*m == 0 || *n == 0) return; + + if ( *n <= MAX(CROSSOVER_SGETRF, 1)) { // Unblocked - LAPACK(sgetf2)(m, n, A, ldA, ipiv, info); + LAPACK(sgetrf2)(m, n, A, ldA, ipiv, info); return; } @@ -91,6 +96,8 @@ static void RELAPACK_sgetrf_rec( // recursion(A_L, ipiv_T) RELAPACK_sgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info); + if (*info) + return; // apply pivots to A_R LAPACK(slaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE); diff --git a/relapack/src/ssytrf.c b/relapack/src/ssytrf.c index 9fe7ce4a6..5f8e03391 100644 --- a/relapack/src/ssytrf.c +++ b/relapack/src/ssytrf.c @@ -35,7 +35,7 @@ void RELAPACK_ssytrf( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork <1 || *lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -66,6 +66,7 @@ void RELAPACK_ssytrf( blasint nout; // Recursive kernel +if (*n != 0) RELAPACK_ssytrf_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/ssytrf_rook.c b/relapack/src/ssytrf_rook.c index abcf29d1c..b40f12271 100644 --- a/relapack/src/ssytrf_rook.c +++ b/relapack/src/ssytrf_rook.c @@ -36,7 +36,7 @@ void RELAPACK_ssytrf_rook( *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; - else if (*lWork < minlWork && *lWork != -1) + else if ((*lWork < 1 ||*lWork < minlWork) && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query @@ -56,7 +56,7 @@ void RELAPACK_ssytrf_rook( if (*info) { const blasint minfo = -*info; - LAPACK(xerbla)("SSYTRF", &minfo, strlen("SSYTRF")); + LAPACK(xerbla)("SSYTRF_ROOK", &minfo, strlen("SSYTRF_ROOK")); return; } @@ -67,6 +67,7 @@ void RELAPACK_ssytrf_rook( blasint nout; // Recursive kernel +if (*n != 0) RELAPACK_ssytrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC diff --git a/relapack/src/strsyl.c b/relapack/src/strsyl.c index 012fb3548..d85963fcc 100644 --- a/relapack/src/strsyl.c +++ b/relapack/src/strsyl.c @@ -49,6 +49,11 @@ void RELAPACK_strsyl( return; } + if (*m == 0 || *n == 0) { + *scale = 1.; + return; + } + // Clean char * arguments const char cleantranA = notransA ? 'N' : (transA ? 'T' : 'C'); const char cleantranB = notransB ? 'N' : (transB ? 'T' : 'C'); From de636757173680ba0a936588ca7b42cdf7ff6c9a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 27 Aug 2020 11:25:18 +0200 Subject: [PATCH 05/13] Add early returns and fix sign errors in workspace calculations --- relapack/src/cgbtrf.c | 11 ++++++----- relapack/src/cpbtrf.c | 10 ++++++---- relapack/src/dgbtrf.c | 5 ++++- relapack/src/dpbtrf.c | 10 ++++++---- relapack/src/sgbtrf.c | 16 ++++++++++++---- relapack/src/spbtrf.c | 13 +++++++++---- relapack/src/zgbtrf.c | 16 +++++++++++++++- relapack/src/zpbtrf.c | 11 +++++++---- 8 files changed, 65 insertions(+), 27 deletions(-) diff --git a/relapack/src/cgbtrf.c b/relapack/src/cgbtrf.c index 61332c6a6..e52f2e6c1 100644 --- a/relapack/src/cgbtrf.c +++ b/relapack/src/cgbtrf.c @@ -36,6 +36,7 @@ void RELAPACK_cgbtrf( return; } + if (*m == 0 || *n == 0) return; // Constant const float ZERO[] = { 0., 0. }; @@ -56,10 +57,10 @@ void RELAPACK_cgbtrf( // Allocate work space const blasint n1 = CREC_SPLIT(*n); - const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv; - const blasint nWorkl = (kv > n1) ? n1 : kv; - const blasint mWorku = (*kl > n1) ? n1 : *kl; - const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl; + const blasint mWorkl = abs ( (kv > n1) ? MAX(1, *m - *kl) : kv); + const blasint nWorkl = abs ( (kv > n1) ? n1 : kv); + const blasint mWorku = abs ((*kl > n1) ? n1 : *kl); + const blasint nWorku = abs ((*kl > n1) ? MAX(0, *n - *kl) : *kl); float *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(float)); float *Worku = malloc(mWorku * nWorku * 2 * sizeof(float)); LAPACK(claset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl); @@ -82,7 +83,7 @@ static void RELAPACK_cgbtrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_CGBTRF, 1)) { + if (*n <= MAX(CROSSOVER_CGBTRF, 1)|| *n > *kl || *ldAb == 1) { // Unblocked LAPACK(cgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); return; diff --git a/relapack/src/cpbtrf.c b/relapack/src/cpbtrf.c index 971e547c6..a0fa13850 100644 --- a/relapack/src/cpbtrf.c +++ b/relapack/src/cpbtrf.c @@ -35,6 +35,8 @@ void RELAPACK_cpbtrf( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -43,8 +45,8 @@ void RELAPACK_cpbtrf( // Allocate work space const blasint n1 = CREC_SPLIT(*n); - const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; - const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; + const blasint mWork = abs((*kd > n1) ? (lower ? *n - *kd : n1) : *kd); + const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); float *Work = malloc(mWork * nWork * 2 * sizeof(float)); LAPACK(claset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); @@ -64,7 +66,7 @@ static void RELAPACK_cpbtrf_rec( blasint *info ){ - if (*n <= MAX(CROSSOVER_CPBTRF, 1)) { + if (*n <= MAX(CROSSOVER_CPBTRF, 1) || *ldAb==1) { // Unblocked LAPACK(cpbtf2)(uplo, n, kd, Ab, ldAb, info); return; @@ -148,7 +150,7 @@ static void RELAPACK_cpbtrf_rec( } // recursion(A_BR) - if (*kd > n1) + if (*kd > n1 && ldA != 0) RELAPACK_cpotrf(uplo, &n2, A_BR, ldA, info); else RELAPACK_cpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); diff --git a/relapack/src/dgbtrf.c b/relapack/src/dgbtrf.c index cdf06ad5b..aac10f251 100644 --- a/relapack/src/dgbtrf.c +++ b/relapack/src/dgbtrf.c @@ -36,6 +36,8 @@ void RELAPACK_dgbtrf( return; } + if (*m == 0 || *n == 0) return; + // Constant const double ZERO[] = { 0. }; @@ -83,7 +85,7 @@ static void RELAPACK_dgbtrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_DGBTRF, 1)) { + if (*n <= MAX(CROSSOVER_DGBTRF, 1) || *n > *kl || *ldAb == 1) { // Unblocked LAPACK(dgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); return; @@ -195,6 +197,7 @@ static void RELAPACK_dgbtrf_rec( // Worku = A_TRr LAPACK(dlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku); // Worku = A_TL \ Worku + if (ldWorku <= 0) return; BLAS(dtrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku); // A_TRr = Worku LAPACK(dlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA); diff --git a/relapack/src/dpbtrf.c b/relapack/src/dpbtrf.c index 9380b28ad..94e9b80e2 100644 --- a/relapack/src/dpbtrf.c +++ b/relapack/src/dpbtrf.c @@ -35,6 +35,8 @@ void RELAPACK_dpbtrf( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -43,8 +45,8 @@ void RELAPACK_dpbtrf( // Allocate work space const blasint n1 = DREC_SPLIT(*n); - const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; - const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; + const blasint mWork = abs((*kd > n1) ? (lower ? *n - *kd : n1) : *kd); + const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); double *Work = malloc(mWork * nWork * sizeof(double)); LAPACK(dlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); @@ -64,7 +66,7 @@ static void RELAPACK_dpbtrf_rec( blasint *info ){ - if (*n <= MAX(CROSSOVER_DPBTRF, 1)) { + if (*n <= MAX(CROSSOVER_DPBTRF, 1) || *ldAb == 1) { // Unblocked LAPACK(dpbtf2)(uplo, n, kd, Ab, ldAb, info); return; @@ -148,7 +150,7 @@ static void RELAPACK_dpbtrf_rec( } // recursion(A_BR) - if (*kd > n1) + if (*kd > n1 && ldA != 0) RELAPACK_dpotrf(uplo, &n2, A_BR, ldA, info); else RELAPACK_dpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); diff --git a/relapack/src/sgbtrf.c b/relapack/src/sgbtrf.c index 3e3fdf455..76e84e671 100644 --- a/relapack/src/sgbtrf.c +++ b/relapack/src/sgbtrf.c @@ -35,6 +35,13 @@ void RELAPACK_sgbtrf( return; } + if (*m == 0 || *n == 0) return; + + if (*ldAb == 1) { + LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); + return; + } + // Constant const float ZERO[] = { 0. }; @@ -82,8 +89,9 @@ static void RELAPACK_sgbtrf_rec( blasint *info ) { + if (*m == 0 || *n == 0) return; - if (*n <= MAX(CROSSOVER_SGBTRF, 1)) { + if ( *n <= MAX(CROSSOVER_SGBTRF, 1) || *n > *kl || *ldAb == 1) { // Unblocked LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); return; @@ -160,7 +168,7 @@ static void RELAPACK_sgbtrf_rec( // recursion(Ab_L, ipiv_T) RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); - + if (*info) return; // Workl = A_BLb LAPACK(slacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl); @@ -222,8 +230,8 @@ static void RELAPACK_sgbtrf_rec( // recursion(Ab_BR, ipiv_B) //cause of infinite recursion here ? -// RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); - LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); + RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); +// LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); if (*info) *info += n1; // shift pivots diff --git a/relapack/src/spbtrf.c b/relapack/src/spbtrf.c index 26804dcc2..330276312 100644 --- a/relapack/src/spbtrf.c +++ b/relapack/src/spbtrf.c @@ -35,6 +35,9 @@ void RELAPACK_spbtrf( return; } + + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -43,8 +46,8 @@ void RELAPACK_spbtrf( // Allocate work space const blasint n1 = SREC_SPLIT(*n); - const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; - const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; + const blasint mWork = abs( (*kd > n1) ? (lower ? *n - *kd : n1) : *kd); + const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); float *Work = malloc(mWork * nWork * sizeof(float)); LAPACK(slaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); @@ -64,7 +67,9 @@ static void RELAPACK_spbtrf_rec( blasint *info ){ - if (*n <= MAX(CROSSOVER_SPBTRF, 1)) { + if (*n == 0 ) return; + + if ( *n <= MAX(CROSSOVER_SPBTRF, 1) || *ldAb == 1) { // Unblocked LAPACK(spbtf2)(uplo, n, kd, Ab, ldAb, info); return; @@ -148,7 +153,7 @@ static void RELAPACK_spbtrf_rec( } // recursion(A_BR) - if (*kd > n1) + if (*kd > n1 && ldA != 0) RELAPACK_spotrf(uplo, &n2, A_BR, ldA, info); else RELAPACK_spbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); diff --git a/relapack/src/zgbtrf.c b/relapack/src/zgbtrf.c index d4ba41753..5d7dfd3c7 100644 --- a/relapack/src/zgbtrf.c +++ b/relapack/src/zgbtrf.c @@ -36,6 +36,8 @@ void RELAPACK_zgbtrf( return; } + if (*m == 0 || *n == 0) return; + // Constant const double ZERO[] = { 0., 0. }; @@ -82,7 +84,7 @@ static void RELAPACK_zgbtrf_rec( blasint *info ) { - if (*n <= MAX(CROSSOVER_ZGBTRF, 1)) { + if (*n <= MAX(CROSSOVER_ZGBTRF, 1) || *n > *kl || *ldAb == 1) { // Unblocked LAPACK(zgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); return; @@ -92,6 +94,7 @@ static void RELAPACK_zgbtrf_rec( const double ONE[] = { 1., 0. }; const double MONE[] = { -1., 0. }; const blasint iONE[] = { 1 }; + const blasint min11 = -11; // Loop iterators blasint i, j; @@ -158,6 +161,7 @@ static void RELAPACK_zgbtrf_rec( // recursion(Ab_L, ipiv_T) RELAPACK_zgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); +if (*info) return; // Workl = A_BLb LAPACK(zlacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl); @@ -193,11 +197,21 @@ static void RELAPACK_zgbtrf_rec( } // A_TRl = A_TL \ A_TRl + if (*ldA < MAX(1,m1)) { + LAPACK(xerbla)("ZGBTRF", &min11, strlen("ZGBTRF")); + return; + } else { BLAS(ztrsm)("L", "L", "N", "U", &m1, &n21, ONE, A_TL, ldA, A_TRl, ldA); + } // Worku = A_TRr LAPACK(zlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku); // Worku = A_TL \ Worku + if (*ldWorku < MAX(1,m1)) { + LAPACK(xerbla)("ZGBTRF", &min11, strlen("ZGBTRF")); + return; + } else { BLAS(ztrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku); + } // A_TRr = Worku LAPACK(zlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA); // A_BRtl = A_BRtl - A_BLt * A_TRl diff --git a/relapack/src/zpbtrf.c b/relapack/src/zpbtrf.c index fb0e1e97b..8b094380c 100644 --- a/relapack/src/zpbtrf.c +++ b/relapack/src/zpbtrf.c @@ -35,6 +35,8 @@ void RELAPACK_zpbtrf( return; } + if (*n == 0) return; + // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; @@ -43,9 +45,10 @@ void RELAPACK_zpbtrf( // Allocate work space const blasint n1 = ZREC_SPLIT(*n); - const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; - const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; + const blasint mWork = abs((*kd > n1) ? (lower ? *n - *kd : n1) : *kd); + const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); double *Work = malloc(mWork * nWork * 2 * sizeof(double)); + LAPACK(zlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); // Recursive kernel @@ -64,7 +67,7 @@ static void RELAPACK_zpbtrf_rec( blasint *info ){ - if (*n <= MAX(CROSSOVER_ZPBTRF, 1)) { + if (*n <= MAX(CROSSOVER_ZPBTRF, 1) || *ldAb == 1) { // Unblocked LAPACK(zpbtf2)(uplo, n, kd, Ab, ldAb, info); return; @@ -148,7 +151,7 @@ static void RELAPACK_zpbtrf_rec( } // recursion(A_BR) - if (*kd > n1) + if (*kd > n1 && ldA != 0) RELAPACK_zpotrf(uplo, &n2, A_BR, ldA, info); else RELAPACK_zpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); From 68b1713c300ac152d1efcb3c02f0c59fafcd39e1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 1 Sep 2020 17:19:14 +0200 Subject: [PATCH 06/13] Merge pull request #2811 from martin-frbg/issue2806 Make NO_AVX512 option override the AVX512 compile test in CMAKE builds as well --- cmake/system.cmake | 5 +++++ cmake/system_check.cmake | 2 ++ 2 files changed, 7 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index e3617c4e2..c0f3c6ed2 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -110,6 +110,11 @@ if (NO_AVX2) set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2") endif () +if (NO_AVX512) + message(STATUS "Disabling Advanced Vector Extensions 512 (AVX512).") + set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX512") +endif () + if (CMAKE_BUILD_TYPE STREQUAL "Debug") set(GETARCH_FLAGS "${GETARCH_FLAGS} ${CMAKE_C_FLAGS_DEBUG}") endif () diff --git a/cmake/system_check.cmake b/cmake/system_check.cmake index 511a7c7d1..d06f4779f 100644 --- a/cmake/system_check.cmake +++ b/cmake/system_check.cmake @@ -109,6 +109,7 @@ else() endif() if (X86_64 OR X86) +if (NOT NO_AVX512) file(WRITE ${PROJECT_BINARY_DIR}/avx512.c "#include \n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }") execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -c -v -o ${PROJECT_BINARY_DIR}/avx512.o ${PROJECT_BINARY_DIR}/avx512.c OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512) if (NO_AVX512 EQUAL 1) @@ -116,6 +117,7 @@ set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512") endif() file(REMOVE "avx512.c" "avx512.o") endif() +endif() include(CheckIncludeFile) CHECK_INCLUDE_FILE("stdatomic.h" HAVE_C11) From e4900caa1180e9b13766a97e708992f9df61b1a1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 1 Sep 2020 19:54:08 +0200 Subject: [PATCH 07/13] Fix c_check misinterpreting arm64 in uname output to mean armv7 additionla fix for upcoming OSX on ARM64 related to #2804, as suggested by fxcoudert in #2805 --- c_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_check b/c_check index 314c2b157..5ea93b75c 100644 --- a/c_check +++ b/c_check @@ -8,7 +8,7 @@ $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); $hostarch = `uname -p` if ($hostos eq "AIX"); $hostarch = "x86_64" if ($hostarch eq "amd64"); -$hostarch = "arm" if ($hostarch =~ /^arm.*/); +$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); $hostarch = "arm64" if ($hostarch eq "aarch64"); $hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); $hostarch = "zarch" if ($hostarch eq "s390x"); From 60ef193258f580115640794e0c867ef45cb16974 Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 1 Sep 2020 13:59:06 +0200 Subject: [PATCH 08/13] s390x: use "lghi" for immediate values to fix build with clang Some of the kernels written in assembly utilize a "load address" instruction for loading an immediate value into a register. That is both unnecessarily complex and LLVM's assembler does not understand that specific syntax. Thus, replace with the appropriate "load immediate" instruction, which is also clearer to read. Signed-off-by: Marius Hillenbrand --- kernel/zarch/ctrmm4x4V.S | 18 +++++++++--------- kernel/zarch/gemm8x4V.S | 24 ++++++++++++------------ kernel/zarch/strmm8x4V.S | 24 ++++++++++++------------ kernel/zarch/ztrmm4x4V.S | 18 +++++++++--------- 4 files changed, 42 insertions(+), 42 deletions(-) diff --git a/kernel/zarch/ctrmm4x4V.S b/kernel/zarch/ctrmm4x4V.S index c0e4df17d..123f2ead0 100644 --- a/kernel/zarch/ctrmm4x4V.S +++ b/kernel/zarch/ctrmm4x4V.S @@ -198,7 +198,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,4 nill LOCAL_VAR1,3 #else - la LOCAL_VAR1,3(0,0) + lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x4_BK_Store @@ -254,7 +254,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x4_BK_Store @@ -305,7 +305,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x4_BK_Store @@ -385,7 +385,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x2_BK_Store @@ -442,7 +442,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x2_BK_Store @@ -492,7 +492,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x2_BK_Store @@ -568,7 +568,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x1_BK_Store @@ -620,7 +620,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x1_BK_Store @@ -670,7 +670,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x1_BK_Store diff --git a/kernel/zarch/gemm8x4V.S b/kernel/zarch/gemm8x4V.S index 27fd5f57b..633e60ea6 100644 --- a/kernel/zarch/gemm8x4V.S +++ b/kernel/zarch/gemm8x4V.S @@ -147,7 +147,7 @@ brctg LOCAL_VAR1,.L8x4_4_BK ALIGN_4 .L8x4_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L8x4_BK_Store @@ -183,7 +183,7 @@ brctg LOCAL_VAR1,.L4x4_4_BK ALIGN_4 .L4x4_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L4x4_BK_Store @@ -217,7 +217,7 @@ brctg LOCAL_VAR1,.L2x4_4_BK ALIGN_4 .L2x4_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L2x4_BK_Store @@ -252,7 +252,7 @@ brctg LOCAL_VAR1,.L1x4_4_BK ALIGN_4 .L1x4_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L1x4_BK_Store @@ -309,7 +309,7 @@ brctg LOCAL_VAR1,.L8x2_4_BK ALIGN_4 .L8x2_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L8x2_BK_Store @@ -346,7 +346,7 @@ brctg LOCAL_VAR1,.L4x2_4_BK ALIGN_4 .L4x2_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L4x2_BK_Store @@ -380,7 +380,7 @@ brctg LOCAL_VAR1,.L2x2_4_BK ALIGN_4 .L2x2_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L2x2_BK_Store @@ -415,7 +415,7 @@ brctg LOCAL_VAR1,.L1x2_4_BK ALIGN_4 .L1x2_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L1x2_BK_Store @@ -471,7 +471,7 @@ brctg LOCAL_VAR1,.L8x1_4_BK ALIGN_4 .L8x1_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L8x1_BK_Store @@ -508,7 +508,7 @@ brctg LOCAL_VAR1,.L4x1_4_BK ALIGN_4 .L4x1_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L4x1_BK_Store @@ -542,7 +542,7 @@ brctg LOCAL_VAR1,.L2x1_4_BK ALIGN_4 .L2x1_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L2x1_BK_Store @@ -577,7 +577,7 @@ brctg LOCAL_VAR1,.L1x1_4_BK ALIGN_4 .L1x1_mod: -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ jz .L1x1_BK_Store diff --git a/kernel/zarch/strmm8x4V.S b/kernel/zarch/strmm8x4V.S index f8e748167..e34a7a05a 100644 --- a/kernel/zarch/strmm8x4V.S +++ b/kernel/zarch/strmm8x4V.S @@ -186,7 +186,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,8,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L8x4_BK_Store @@ -239,7 +239,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,4 nill LOCAL_VAR1,3 #else - la LOCAL_VAR1,3(0,0) + lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x4_BK_Store @@ -290,7 +290,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x4_BK_Store @@ -341,7 +341,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x4_BK_Store @@ -423,7 +423,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,8,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L8x2_BK_Store @@ -475,7 +475,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x2_BK_Store @@ -525,7 +525,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x2_BK_Store @@ -575,7 +575,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x2_BK_Store @@ -655,7 +655,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,8,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L8x1_BK_Store @@ -708,7 +708,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x1_BK_Store @@ -757,7 +757,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x1_BK_Store @@ -807,7 +807,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x1_BK_Store diff --git a/kernel/zarch/ztrmm4x4V.S b/kernel/zarch/ztrmm4x4V.S index 52ee15f06..6fd7f2509 100644 --- a/kernel/zarch/ztrmm4x4V.S +++ b/kernel/zarch/ztrmm4x4V.S @@ -196,7 +196,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,4 nill LOCAL_VAR1,3 #else - la LOCAL_VAR1,3(0,0) + lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x4_BK_Store @@ -256,7 +256,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x4_BK_Store @@ -307,7 +307,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,4 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x4_BK_Store @@ -390,7 +390,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x2_BK_Store @@ -447,7 +447,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x2_BK_Store @@ -497,7 +497,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,2 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x2_BK_Store @@ -573,7 +573,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,4,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L4x1_BK_Store @@ -625,7 +625,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,2,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L2x1_BK_Store @@ -675,7 +675,7 @@ ALIGN_4 RefreshTempBk LOCAL_VAR1,BK,OFF,1,1 nill LOCAL_VAR1,3 #else -la LOCAL_VAR1,3(0,0) +lghi LOCAL_VAR1,3 NGR LOCAL_VAR1,BK /*refresh BK*/ #endif jz .L1x1_BK_Store From a1616a0b8653fb06d607c5f8efafa01b0106dded Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 1 Sep 2020 11:58:48 +0200 Subject: [PATCH 09/13] s390x: replace nop with "nop 0" in inline assembly ... as a bandaid for building with clang until LLVM's internal assembler supports nops without operand. Signed-off-by: Marius Hillenbrand --- kernel/zarch/dgemv_n_4.c | 6 +++--- kernel/zarch/dgemv_t_4.c | 2 +- kernel/zarch/icamax.c | 2 +- kernel/zarch/icamin.c | 2 +- kernel/zarch/idamax.c | 2 +- kernel/zarch/idamin.c | 2 +- kernel/zarch/idmax.c | 2 +- kernel/zarch/idmin.c | 2 +- kernel/zarch/isamax.c | 2 +- kernel/zarch/isamin.c | 2 +- kernel/zarch/ismax.c | 2 +- kernel/zarch/ismin.c | 2 +- kernel/zarch/izamax.c | 2 +- kernel/zarch/izamin.c | 2 +- kernel/zarch/sgemv_n_4.c | 6 +++--- kernel/zarch/sgemv_t_4.c | 2 +- 16 files changed, 20 insertions(+), 20 deletions(-) diff --git a/kernel/zarch/dgemv_n_4.c b/kernel/zarch/dgemv_n_4.c index 502ba837e..b2a3d1e8d 100644 --- a/kernel/zarch/dgemv_n_4.c +++ b/kernel/zarch/dgemv_n_4.c @@ -169,7 +169,7 @@ static void dgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "agfi %%r1,32\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" + "nop 0" : "+m"(*(struct { FLOAT x[n]; } *) y) : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), @@ -274,7 +274,7 @@ static void dgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "agfi %%r1,32\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" + "nop 0" : "+m"(*(struct { FLOAT x[n]; } *) y) : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), @@ -351,7 +351,7 @@ static void dgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y, "agfi %%r1,32\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" + "nop 0" : "+m"(*(struct { FLOAT x[n]; } *) y) : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), "m"(*(const FLOAT (*)[1]) x),[x] "a"(x),[alpha] "Q"(*alpha), diff --git a/kernel/zarch/dgemv_t_4.c b/kernel/zarch/dgemv_t_4.c index de72a1798..30cec14f7 100644 --- a/kernel/zarch/dgemv_t_4.c +++ b/kernel/zarch/dgemv_t_4.c @@ -438,7 +438,7 @@ static void add_y_kernel_4(BLASLONG n, FLOAT da, FLOAT *src, FLOAT *dest) { "agfi %%r1,32\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" + "nop 0" : "+m"(*(struct { FLOAT x[n]; } *) dest) : [dest] "a"(dest),[da] "Q"(da), "m"(*(const struct { FLOAT x[n]; } *) src), [src] "a"(src),[n] "r"(n) diff --git a/kernel/zarch/icamax.c b/kernel/zarch/icamax.c index a2546b812..2d5c48407 100644 --- a/kernel/zarch/icamax.c +++ b/kernel/zarch/icamax.c @@ -213,7 +213,7 @@ static BLASLONG icamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax) { "ste %%f0,%[amax]\n\t" "vlgvg %[iamax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", diff --git a/kernel/zarch/icamin.c b/kernel/zarch/icamin.c index 09654b742..1d51bb2c2 100644 --- a/kernel/zarch/icamin.c +++ b/kernel/zarch/icamin.c @@ -213,7 +213,7 @@ static BLASLONG icamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin) { "ste %%f0,%[amin]\n\t" "vlgvg %[iamin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", diff --git a/kernel/zarch/idamax.c b/kernel/zarch/idamax.c index b292c1d15..f9bfe3494 100644 --- a/kernel/zarch/idamax.c +++ b/kernel/zarch/idamax.c @@ -160,7 +160,7 @@ static BLASLONG idamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax) { "std %%f0,%[amax]\n\t" "vlgvg %[iamax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", diff --git a/kernel/zarch/idamin.c b/kernel/zarch/idamin.c index f9a8119e1..b7ce70027 100644 --- a/kernel/zarch/idamin.c +++ b/kernel/zarch/idamin.c @@ -160,7 +160,7 @@ static BLASLONG idamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin) { "std %%f0,%[amin]\n\t" "vlgvg %[iamin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", diff --git a/kernel/zarch/idmax.c b/kernel/zarch/idmax.c index 8f283bc17..55471ce50 100644 --- a/kernel/zarch/idmax.c +++ b/kernel/zarch/idmax.c @@ -140,7 +140,7 @@ static BLASLONG idmax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *max) { "std %%f0,%[max]\n\t" "vlgvg %[imax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [imax] "=r"(imax),[max] "=Q"(*max),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", diff --git a/kernel/zarch/idmin.c b/kernel/zarch/idmin.c index e4b7bb4fe..ec1c69822 100644 --- a/kernel/zarch/idmin.c +++ b/kernel/zarch/idmin.c @@ -140,7 +140,7 @@ static BLASLONG idmin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *min) { "std %%f0,%[min]\n\t" "vlgvg %[imin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [imin] "=r"(imin),[min] "=Q"(*min),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", diff --git a/kernel/zarch/isamax.c b/kernel/zarch/isamax.c index ac86435d7..6ea46c716 100644 --- a/kernel/zarch/isamax.c +++ b/kernel/zarch/isamax.c @@ -204,7 +204,7 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax) { "ste %%f0,%[amax]\n\t" "vlgvg %[iamax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", diff --git a/kernel/zarch/isamin.c b/kernel/zarch/isamin.c index 3f2d039eb..18cfa2a6e 100644 --- a/kernel/zarch/isamin.c +++ b/kernel/zarch/isamin.c @@ -204,7 +204,7 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin) { "ste %%f0,%[amin]\n\t" "vlgvg %[iamin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", diff --git a/kernel/zarch/ismax.c b/kernel/zarch/ismax.c index 41172c1bd..be990b9d5 100644 --- a/kernel/zarch/ismax.c +++ b/kernel/zarch/ismax.c @@ -184,7 +184,7 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max) { "ste %%f0,%[max]\n\t" "vlgvg %[imax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [imax] "=r"(imax),[max] "=Q"(*max),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", diff --git a/kernel/zarch/ismin.c b/kernel/zarch/ismin.c index e2684df41..a27c8a743 100644 --- a/kernel/zarch/ismin.c +++ b/kernel/zarch/ismin.c @@ -184,7 +184,7 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min) { "ste %%f0,%[min]\n\t" "vlgvg %[imin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [imin] "=r"(imin),[min] "=Q"(*min),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", diff --git a/kernel/zarch/izamax.c b/kernel/zarch/izamax.c index daca1d6f7..cb299cb24 100644 --- a/kernel/zarch/izamax.c +++ b/kernel/zarch/izamax.c @@ -157,7 +157,7 @@ static BLASLONG izamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amax) { "std %%f0,%[amax]\n\t" "vlgvg %[iamax],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18", diff --git a/kernel/zarch/izamin.c b/kernel/zarch/izamin.c index 9ababb91f..4dfa1a9db 100644 --- a/kernel/zarch/izamin.c +++ b/kernel/zarch/izamin.c @@ -157,7 +157,7 @@ static BLASLONG izamin_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amin) { "std %%f0,%[amin]\n\t" "vlgvg %[iamin],%%v1,0\n\t" "2:\n\t" - "nop" + "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18", diff --git a/kernel/zarch/sgemv_n_4.c b/kernel/zarch/sgemv_n_4.c index a1efef373..a0d522b83 100644 --- a/kernel/zarch/sgemv_n_4.c +++ b/kernel/zarch/sgemv_n_4.c @@ -159,7 +159,7 @@ static void sgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "agfi %%r1,16\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" + "nop 0" : "+m"(*(struct { FLOAT x[n]; } *) y) : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), @@ -258,7 +258,7 @@ static void sgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "agfi %%r1,16\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" + "nop 0" : "+m"(*(struct { FLOAT x[n]; } *) y) : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), @@ -331,7 +331,7 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y, "agfi %%r1,16\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" + "nop 0" : "+m"(*(struct { FLOAT x[n]; } *) y) : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), "m"(*(const FLOAT (*)[1]) x),[x] "a"(x),[alpha] "Q"(*alpha), diff --git a/kernel/zarch/sgemv_t_4.c b/kernel/zarch/sgemv_t_4.c index 81d7c9fe7..81e600695 100644 --- a/kernel/zarch/sgemv_t_4.c +++ b/kernel/zarch/sgemv_t_4.c @@ -431,7 +431,7 @@ static void add_y_kernel_4(BLASLONG n, FLOAT da, FLOAT *src, FLOAT *dest) { "agfi %%r1,16\n\t" "brctg %%r0,2b\n\t" "3:\n\t" - "nop" + "nop 0" : "+m"(*(struct { FLOAT x[n]; } *) dest) : [dest] "a"(dest),[da] "Q"(da), "m"(*(const struct { FLOAT x[n]; } *) src), [src] "a"(src),[n] "r"(n) From b9b3265ec8a78762263f54944e35c849013e0cab Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 1 Sep 2020 12:04:28 +0200 Subject: [PATCH 10/13] s390x: avoid inline assembly for vector loads for clang ... since clang does not support the instruction format for inline assembly and also it is not required for current versions of clang. Signed-off-by: Marius Hillenbrand --- kernel/zarch/gemm_vec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/zarch/gemm_vec.c b/kernel/zarch/gemm_vec.c index 741c09431..b7d7cc04b 100644 --- a/kernel/zarch/gemm_vec.c +++ b/kernel/zarch/gemm_vec.c @@ -172,7 +172,7 @@ static inline vector_float vec_load_hinted(FLOAT const *restrict a) { vector_float const *restrict addr = (vector_float const *restrict)a; vector_float y; -#if __GNUC__ < 9 +#if __GNUC__ < 9 && !defined(__clang__) // hex-encode vl %[out],%[addr],3 asm(".insn vrx,0xe70000003006,%[out],%[addr],3" : [ out ] "=v"(y) From 87e5bbd88795d09f4bec0691d33f91e8109eb424 Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 1 Sep 2020 12:08:05 +0200 Subject: [PATCH 11/13] s390x: avoid variable-length arrays in struct for asm operands ... since it is not required and clang does not support that gcc extension. Instead, use a variable-length array directly for these operands. Note that, while the actual inline assembly code does not directly use these memory operands, they serve to inform the compiler that it cannot reorder reads or writes to/from the input and output data across the inline asm statements. Signed-off-by: Marius Hillenbrand --- kernel/zarch/camax.c | 2 +- kernel/zarch/camin.c | 2 +- kernel/zarch/casum.c | 2 +- kernel/zarch/caxpy.c | 6 +++--- kernel/zarch/ccopy.c | 4 ++-- kernel/zarch/cdot.c | 6 +++--- kernel/zarch/cgemv_n_4.c | 30 +++++++++++++++--------------- kernel/zarch/cgemv_t_4.c | 32 ++++++++++++++++---------------- kernel/zarch/crot.c | 4 ++-- kernel/zarch/cscal.c | 14 +++++++------- kernel/zarch/csum.c | 2 +- kernel/zarch/cswap.c | 4 ++-- kernel/zarch/damax.c | 2 +- kernel/zarch/damax_z13.c | 2 +- kernel/zarch/damin.c | 2 +- kernel/zarch/damin_z13.c | 2 +- kernel/zarch/dasum.c | 2 +- kernel/zarch/daxpy.c | 4 ++-- kernel/zarch/dcopy.c | 4 ++-- kernel/zarch/ddot.c | 4 ++-- kernel/zarch/dgemv_n_4.c | 24 ++++++++++++------------ kernel/zarch/dgemv_t_4.c | 28 ++++++++++++++-------------- kernel/zarch/dmax.c | 2 +- kernel/zarch/dmax_z13.c | 2 +- kernel/zarch/dmin.c | 2 +- kernel/zarch/dmin_z13.c | 2 +- kernel/zarch/drot.c | 2 +- kernel/zarch/dscal.c | 4 ++-- kernel/zarch/dsdot.c | 4 ++-- kernel/zarch/dsum.c | 2 +- kernel/zarch/dswap.c | 2 +- kernel/zarch/icamax.c | 2 +- kernel/zarch/icamin.c | 2 +- kernel/zarch/idamax.c | 2 +- kernel/zarch/idamin.c | 2 +- kernel/zarch/idmax.c | 2 +- kernel/zarch/idmin.c | 2 +- kernel/zarch/isamax.c | 2 +- kernel/zarch/isamin.c | 2 +- kernel/zarch/ismax.c | 2 +- kernel/zarch/ismin.c | 2 +- kernel/zarch/izamax.c | 2 +- kernel/zarch/izamin.c | 2 +- kernel/zarch/samax.c | 2 +- kernel/zarch/samin.c | 2 +- kernel/zarch/sasum.c | 2 +- kernel/zarch/saxpy.c | 4 ++-- kernel/zarch/scopy.c | 4 ++-- kernel/zarch/sdot.c | 4 ++-- kernel/zarch/sgemv_n_4.c | 24 ++++++++++++------------ kernel/zarch/sgemv_t_4.c | 28 ++++++++++++++-------------- kernel/zarch/smax.c | 2 +- kernel/zarch/smin.c | 2 +- kernel/zarch/srot.c | 2 +- kernel/zarch/sscal.c | 4 ++-- kernel/zarch/ssum.c | 2 +- kernel/zarch/sswap.c | 2 +- kernel/zarch/zamax.c | 2 +- kernel/zarch/zamax_z13.c | 2 +- kernel/zarch/zamin.c | 2 +- kernel/zarch/zamin_z13.c | 2 +- kernel/zarch/zasum.c | 2 +- kernel/zarch/zaxpy.c | 6 +++--- kernel/zarch/zcopy.c | 4 ++-- kernel/zarch/zdot.c | 6 +++--- kernel/zarch/zgemv_n_4.c | 30 +++++++++++++++--------------- kernel/zarch/zgemv_t_4.c | 32 ++++++++++++++++---------------- kernel/zarch/zrot.c | 4 ++-- kernel/zarch/zscal.c | 14 +++++++------- kernel/zarch/zsum.c | 2 +- kernel/zarch/zswap.c | 4 ++-- 71 files changed, 212 insertions(+), 212 deletions(-) diff --git a/kernel/zarch/camax.c b/kernel/zarch/camax.c index b10ca4752..018a9a9c0 100644 --- a/kernel/zarch/camax.c +++ b/kernel/zarch/camax.c @@ -136,7 +136,7 @@ static FLOAT camax_kernel_32(BLASLONG n, FLOAT *x) { "wfmaxsb %%v0,%%v0,%%v16,0\n\t" "ler %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/camin.c b/kernel/zarch/camin.c index 40945fae8..7b3b36630 100644 --- a/kernel/zarch/camin.c +++ b/kernel/zarch/camin.c @@ -136,7 +136,7 @@ static FLOAT camin_kernel_32(BLASLONG n, FLOAT *x) { "wfminsb %%v0,%%v0,%%v16,0\n\t" "ler %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/casum.c b/kernel/zarch/casum.c index e28f2018c..f3b9ed628 100644 --- a/kernel/zarch/casum.c +++ b/kernel/zarch/casum.c @@ -108,7 +108,7 @@ static FLOAT casum_kernel_32(BLASLONG n, FLOAT *x) { "vfasb %%v24,%%v24,%%v25\n\t" "vstef %%v24,%[asum],0" : [asum] "=Q"(asum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/caxpy.c b/kernel/zarch/caxpy.c index 14a124ae2..c0a7a71f4 100644 --- a/kernel/zarch/caxpy.c +++ b/kernel/zarch/caxpy.c @@ -99,9 +99,9 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) { "vst %%v19,112(%%r1,%[y])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/ccopy.c b/kernel/zarch/ccopy.c index 0a5e03992..9e08edc3b 100644 --- a/kernel/zarch/ccopy.c +++ b/kernel/zarch/ccopy.c @@ -36,9 +36,9 @@ static void ccopy_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "la %[x],256(%[x])\n\t" "la %[y],256(%[y])\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n * 2]; } *) y),[x] "+&a"(x),[y] "+&a"(y), + : "=m"(*(FLOAT (*)[n * 2]) y),[x] "+&a"(x),[y] "+&a"(y), [n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x) + : "m"(*(const FLOAT (*)[n * 2]) x) : "cc"); } diff --git a/kernel/zarch/cdot.c b/kernel/zarch/cdot.c index d90f9c871..0d6dfbeb1 100644 --- a/kernel/zarch/cdot.c +++ b/kernel/zarch/cdot.c @@ -97,9 +97,9 @@ static void cdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) { "vstef %%v24,4(%[d]),1\n\t" "vstef %%v25,8(%[d]),1\n\t" "vstef %%v25,12(%[d]),0" - : "=m"(*(struct { FLOAT x[4]; } *) d),[n] "+&r"(n) - : [d] "a"(d), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n * 2]; } *) y),[y] "a"(y) + : "=m"(*(FLOAT (*)[4]) d),[n] "+&r"(n) + : [d] "a"(d), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n * 2]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/cgemv_n_4.c b/kernel/zarch/cgemv_n_4.c index 5c36bc338..5fdf7717e 100644 --- a/kernel/zarch/cgemv_n_4.c +++ b/kernel/zarch/cgemv_n_4.c @@ -146,12 +146,12 @@ static void cgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vst %%v0,0(%%r1,%[y])\n\t" "agfi %%r1,16\n\t" "brctg %[n],0b\n\t" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[8]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n * 2]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[8]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -238,10 +238,10 @@ static void cgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vst %%v0,0(%%r1,%[y])\n\t" "agfi %%r1,16\n\t" "brctg %[n],0b\n\t" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[4]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[4]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } @@ -307,9 +307,9 @@ static void cgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) { "vst %%v0,0(%%r1,%[y])\n\t" "agfi %%r1,16\n\t" "brctg %[n],0b\n\t" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap),[ap] "a"(ap), - "m"(*(const struct { FLOAT x[2]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap),[ap] "a"(ap), + "m"(*(const FLOAT (*)[2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v16", "v17", "v18", "v19"); } @@ -350,8 +350,8 @@ static void add_y_4(BLASLONG n, FLOAT *src, FLOAT *dest, FLOAT alpha_r, "vst %%v23,16(%%r1,%[dest])\n\t" "agfi %%r1,32\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) dest),[n] "+&r"(n) - : [dest] "a"(dest), "m"(*(const struct { FLOAT x[n * 2]; } *) src), + : "+m"(*(FLOAT (*)[n * 2]) dest),[n] "+&r"(n) + : [dest] "a"(dest), "m"(*(const FLOAT (*)[n * 2]) src), [src] "a"(src),[alpha_r] "Q"(alpha_r),[alpha_i] "Q"(alpha_i) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); diff --git a/kernel/zarch/cgemv_t_4.c b/kernel/zarch/cgemv_t_4.c index e10edfab0..2bdac9ea1 100644 --- a/kernel/zarch/cgemv_t_4.c +++ b/kernel/zarch/cgemv_t_4.c @@ -159,13 +159,13 @@ static void cgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "vfmasb %%v23,%%v19,%%v21,%%v23\n\t" "vst %%v22,0(%[y])\n\t" "vst %%v23,16(%[y])" - : "+m"(*(struct { FLOAT x[8]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[8]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n * 2]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -271,11 +271,11 @@ static void cgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "vfmasb %%v20,%%v16,%%v18,%%v20\n\t" "vfmasb %%v20,%%v17,%%v19,%%v20\n\t" "vst %%v20,0(%[y])" - : "+m"(*(struct { FLOAT x[4]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[4]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } @@ -361,10 +361,10 @@ static void cgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, "vfmasb %%v0,%%v16,%%v18,%%v0\n\t" "vfmasb %%v0,%%v17,%%v19,%%v0\n\t" "vsteg %%v0,0(%[y]),0" - : "+m"(*(struct { FLOAT x[2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap),[ap] "a"(ap), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap),[ap] "a"(ap), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); } diff --git a/kernel/zarch/crot.c b/kernel/zarch/crot.c index aab155f8b..5a0990f3d 100644 --- a/kernel/zarch/crot.c +++ b/kernel/zarch/crot.c @@ -169,8 +169,8 @@ static void crot_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *c, FLOAT *s) { "vst %%v23, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x), - "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n * 2]) x), + "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) : [x] "a"(x),[y] "a"(y),[c] "Q"(*c),[s] "Q"(*s) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/cscal.c b/kernel/zarch/cscal.c index 9fc54cf29..f9e89a452 100644 --- a/kernel/zarch/cscal.c +++ b/kernel/zarch/cscal.c @@ -80,8 +80,8 @@ static void cscal_kernel_16(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", @@ -132,8 +132,8 @@ static void cscal_kernel_16_zero_r(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); @@ -171,8 +171,8 @@ static void cscal_kernel_16_zero_i(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); @@ -194,7 +194,7 @@ static void cscal_kernel_16_zero(BLASLONG n, FLOAT *x) { "vst %%v0,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) + : "=m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) : [x] "a"(x) : "cc", "r1", "v0"); } diff --git a/kernel/zarch/csum.c b/kernel/zarch/csum.c index e9413da8e..b076501aa 100644 --- a/kernel/zarch/csum.c +++ b/kernel/zarch/csum.c @@ -90,7 +90,7 @@ static FLOAT csum_kernel_32(BLASLONG n, FLOAT *x) { "vfasb %%v24,%%v24,%%v25\n\t" "vstef %%v24,%[sum],0" : [sum] "=Q"(sum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/cswap.c b/kernel/zarch/cswap.c index 198994e18..f3ab77ab5 100644 --- a/kernel/zarch/cswap.c +++ b/kernel/zarch/cswap.c @@ -99,8 +99,8 @@ static void cswap_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "vst %%v31, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x), - "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n * 2]) x), + "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) : [x] "a"(x),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", diff --git a/kernel/zarch/damax.c b/kernel/zarch/damax.c index caacb50dc..d19181cbe 100644 --- a/kernel/zarch/damax.c +++ b/kernel/zarch/damax.c @@ -76,7 +76,7 @@ static FLOAT damax_kernel_32(BLASLONG n, FLOAT *x) { "wfmaxdb %%v0,%%v0,%%v16,8\n\t" "lpdr %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/damax_z13.c b/kernel/zarch/damax_z13.c index f3db4c108..5bc0d1721 100644 --- a/kernel/zarch/damax_z13.c +++ b/kernel/zarch/damax_z13.c @@ -110,7 +110,7 @@ static FLOAT damax_kernel_32(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/damin.c b/kernel/zarch/damin.c index 0163a144b..4e0558af4 100644 --- a/kernel/zarch/damin.c +++ b/kernel/zarch/damin.c @@ -76,7 +76,7 @@ static FLOAT damin_kernel_32(BLASLONG n, FLOAT *x) { "wfmindb %%v0,%%v0,%%v16,8\n\t" "lpdr %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/damin_z13.c b/kernel/zarch/damin_z13.c index 4196b2e15..a7efd4b26 100644 --- a/kernel/zarch/damin_z13.c +++ b/kernel/zarch/damin_z13.c @@ -110,7 +110,7 @@ static FLOAT damin_kernel_32(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dasum.c b/kernel/zarch/dasum.c index aa1382b10..9703cd3be 100644 --- a/kernel/zarch/dasum.c +++ b/kernel/zarch/dasum.c @@ -106,7 +106,7 @@ static FLOAT dasum_kernel_32(BLASLONG n, FLOAT *x) { "vfadb %%v24,%%v24,%%v25\n\t" "vsteg %%v24,%[asum],0" : [asum] "=Q"(asum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/daxpy.c b/kernel/zarch/daxpy.c index 5b0208c20..4e59ef7c6 100644 --- a/kernel/zarch/daxpy.c +++ b/kernel/zarch/daxpy.c @@ -100,8 +100,8 @@ static void daxpy_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) { "vst %%v27,240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), + : "+m"(*(FLOAT (*)[n]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), [alpha] "Q"(*alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dcopy.c b/kernel/zarch/dcopy.c index 691b90c64..3c546568f 100644 --- a/kernel/zarch/dcopy.c +++ b/kernel/zarch/dcopy.c @@ -36,8 +36,8 @@ static void dcopy_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "la %[x],256(%[x])\n\t" "la %[y],256(%[y])\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n]; } *) y),[x] "+&a"(x),[y] "+&a"(y),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x) + : "=m"(*(FLOAT (*)[n]) y),[x] "+&a"(x),[y] "+&a"(y),[n] "+&r"(n) + : "m"(*(const FLOAT (*)[n]) x) : "cc"); } diff --git a/kernel/zarch/ddot.c b/kernel/zarch/ddot.c index 9cad68f4b..c0ed8b72e 100644 --- a/kernel/zarch/ddot.c +++ b/kernel/zarch/ddot.c @@ -80,8 +80,8 @@ static FLOAT ddot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y) { "adbr %%f0,%%f1\n\t" "ldr %[dot],%%f0" : [dot] "=f"(dot),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n]; } *) y),[y] "a"(y) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dgemv_n_4.c b/kernel/zarch/dgemv_n_4.c index b2a3d1e8d..e1c5c4472 100644 --- a/kernel/zarch/dgemv_n_4.c +++ b/kernel/zarch/dgemv_n_4.c @@ -170,12 +170,12 @@ static void dgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "brctg %%r0,2b\n\t" "3:\n\t" "nop 0" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[4]; } *) x),[x] "a"(x),[alpha] "Q"(*alpha), + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[4]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", @@ -275,10 +275,10 @@ static void dgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "brctg %%r0,2b\n\t" "3:\n\t" "nop 0" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[2]; } *) x),[x] "a"(x),[alpha] "Q"(*alpha), + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[2]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -352,8 +352,8 @@ static void dgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y, "brctg %%r0,2b\n\t" "3:\n\t" "nop 0" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) a0),[a0] "a"(a0), "m"(*(const FLOAT (*)[1]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", diff --git a/kernel/zarch/dgemv_t_4.c b/kernel/zarch/dgemv_t_4.c index 30cec14f7..513cffe5a 100644 --- a/kernel/zarch/dgemv_t_4.c +++ b/kernel/zarch/dgemv_t_4.c @@ -173,12 +173,12 @@ static void dgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vrepg %%v4,%%v3,1\n\t" "adbr %%f3,%%f4\n\t" "std %%f3,24(%[y])" - : "=m"(*(struct { FLOAT x[4]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : "=m"(*(FLOAT (*)[4]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -280,10 +280,10 @@ static void dgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vrepg %%v2,%%v1,1\n\t" "adbr %%f1,%%f2\n\t" "std %%f1,8(%[y])" - : "=m"(*(struct { FLOAT x[2]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : "=m"(*(FLOAT (*)[2]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -360,8 +360,8 @@ static void dgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y) { "adbr %%f0,%%f1\n\t" "std %%f0,0(%[y])" : "=m"(*(FLOAT (*)[1]) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) a0),[a0] "a"(a0), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -439,8 +439,8 @@ static void add_y_kernel_4(BLASLONG n, FLOAT da, FLOAT *src, FLOAT *dest) { "brctg %%r0,2b\n\t" "3:\n\t" "nop 0" - : "+m"(*(struct { FLOAT x[n]; } *) dest) - : [dest] "a"(dest),[da] "Q"(da), "m"(*(const struct { FLOAT x[n]; } *) src), + : "+m"(*(FLOAT (*)[n]) dest) + : [dest] "a"(dest),[da] "Q"(da), "m"(*(const FLOAT (*)[n]) src), [src] "a"(src),[n] "r"(n) : "cc", "r0", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/dmax.c b/kernel/zarch/dmax.c index cdc8d5d08..4b76e0dd6 100644 --- a/kernel/zarch/dmax.c +++ b/kernel/zarch/dmax.c @@ -73,7 +73,7 @@ static FLOAT dmax_kernel_32(BLASLONG n, FLOAT *x) { "wfmaxdb %%v0,%%v0,%%v16,0\n\t" "ldr %[max],%%f0" : [max] "=f"(max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dmax_z13.c b/kernel/zarch/dmax_z13.c index c4e8d91f8..93acee2db 100644 --- a/kernel/zarch/dmax_z13.c +++ b/kernel/zarch/dmax_z13.c @@ -90,7 +90,7 @@ static FLOAT dmax_kernel_32(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[max],%%f0" : [max] "=f"(max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dmin.c b/kernel/zarch/dmin.c index f9b129cbd..21d55f323 100644 --- a/kernel/zarch/dmin.c +++ b/kernel/zarch/dmin.c @@ -73,7 +73,7 @@ static FLOAT dmin_kernel_32(BLASLONG n, FLOAT *x) { "wfmindb %%v0,%%v0,%%v16,0\n\t" "ldr %[min],%%f0" : [min] "=f"(min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dmin_z13.c b/kernel/zarch/dmin_z13.c index 77f021c1d..7d2dae3fb 100644 --- a/kernel/zarch/dmin_z13.c +++ b/kernel/zarch/dmin_z13.c @@ -90,7 +90,7 @@ static FLOAT dmin_kernel_32(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[min],%%f0" : [min] "=f"(min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/drot.c b/kernel/zarch/drot.c index 11fbe15b6..9d6d1a80d 100644 --- a/kernel/zarch/drot.c +++ b/kernel/zarch/drot.c @@ -169,7 +169,7 @@ static void drot_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *c, FLOAT *s) { "vst %%v23, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x), "+m"(*(struct { FLOAT x[n]; } *) y), + : "+m"(*(FLOAT (*)[n]) x), "+m"(*(FLOAT (*)[n]) y), [n] "+&r"(n) : [x] "a"(x),[y] "a"(y),[c] "Q"(*c),[s] "Q"(*s) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", diff --git a/kernel/zarch/dscal.c b/kernel/zarch/dscal.c index 2961eff20..a5a5e3468 100644 --- a/kernel/zarch/dscal.c +++ b/kernel/zarch/dscal.c @@ -59,7 +59,7 @@ static void dscal_kernel_16(BLASLONG n, FLOAT da, FLOAT *x) { "vst %%v31,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n]) x),[n] "+&r"(n) : [x] "a"(x),[da] "Q"(da) : "cc", "r1", "v0", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -81,7 +81,7 @@ static void dscal_kernel_16_zero(BLASLONG n, FLOAT *x) { "vst %%v0,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n]; } *) x),[n] "+&r"(n) + : "=m"(*(FLOAT (*)[n]) x),[n] "+&r"(n) : [x] "a"(x) : "cc", "r1", "v0"); } diff --git a/kernel/zarch/dsdot.c b/kernel/zarch/dsdot.c index 5fa88c3b9..2952bcf42 100644 --- a/kernel/zarch/dsdot.c +++ b/kernel/zarch/dsdot.c @@ -112,8 +112,8 @@ static double dsdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y) { "adbr %%f0,%%f1\n\t" "ldr %[dot],%%f0" : [dot] "=f"(dot),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n]; } *) y),[y] "a"(y) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dsum.c b/kernel/zarch/dsum.c index 8d44873c0..69b9f9b41 100644 --- a/kernel/zarch/dsum.c +++ b/kernel/zarch/dsum.c @@ -88,7 +88,7 @@ static FLOAT dsum_kernel_32(BLASLONG n, FLOAT *x) { "vfadb %%v24,%%v24,%%v25\n\t" "vsteg %%v24,%[sum],0" : [sum] "=Q"(sum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/dswap.c b/kernel/zarch/dswap.c index f0c9ded51..46cbbba23 100644 --- a/kernel/zarch/dswap.c +++ b/kernel/zarch/dswap.c @@ -99,7 +99,7 @@ static void dswap_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "vst %%v31, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x), "+m"(*(struct { FLOAT x[n]; } *) y), + : "+m"(*(FLOAT (*)[n]) x), "+m"(*(FLOAT (*)[n]) y), [n] "+&r"(n) : [x] "a"(x),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", diff --git a/kernel/zarch/icamax.c b/kernel/zarch/icamax.c index 2d5c48407..459196d00 100644 --- a/kernel/zarch/icamax.c +++ b/kernel/zarch/icamax.c @@ -215,7 +215,7 @@ static BLASLONG icamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax) { "2:\n\t" "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/icamin.c b/kernel/zarch/icamin.c index 1d51bb2c2..9bcf3646b 100644 --- a/kernel/zarch/icamin.c +++ b/kernel/zarch/icamin.c @@ -215,7 +215,7 @@ static BLASLONG icamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin) { "2:\n\t" "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/idamax.c b/kernel/zarch/idamax.c index f9bfe3494..0f53488d3 100644 --- a/kernel/zarch/idamax.c +++ b/kernel/zarch/idamax.c @@ -162,7 +162,7 @@ static BLASLONG idamax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amax) { "2:\n\t" "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/idamin.c b/kernel/zarch/idamin.c index b7ce70027..f48bde894 100644 --- a/kernel/zarch/idamin.c +++ b/kernel/zarch/idamin.c @@ -162,7 +162,7 @@ static BLASLONG idamin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *amin) { "2:\n\t" "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/idmax.c b/kernel/zarch/idmax.c index 55471ce50..1fdf1fa02 100644 --- a/kernel/zarch/idmax.c +++ b/kernel/zarch/idmax.c @@ -142,7 +142,7 @@ static BLASLONG idmax_kernel_32(BLASLONG n, FLOAT *x, FLOAT *max) { "2:\n\t" "nop 0" : [imax] "=r"(imax),[max] "=Q"(*max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/idmin.c b/kernel/zarch/idmin.c index ec1c69822..282f26bbd 100644 --- a/kernel/zarch/idmin.c +++ b/kernel/zarch/idmin.c @@ -142,7 +142,7 @@ static BLASLONG idmin_kernel_32(BLASLONG n, FLOAT *x, FLOAT *min) { "2:\n\t" "nop 0" : [imin] "=r"(imin),[min] "=Q"(*min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/isamax.c b/kernel/zarch/isamax.c index 6ea46c716..a30a96412 100644 --- a/kernel/zarch/isamax.c +++ b/kernel/zarch/isamax.c @@ -206,7 +206,7 @@ static BLASLONG isamax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amax) { "2:\n\t" "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT(*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/isamin.c b/kernel/zarch/isamin.c index 18cfa2a6e..b29027ff4 100644 --- a/kernel/zarch/isamin.c +++ b/kernel/zarch/isamin.c @@ -206,7 +206,7 @@ static BLASLONG isamin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *amin) { "2:\n\t" "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/ismax.c b/kernel/zarch/ismax.c index be990b9d5..3d751ff6b 100644 --- a/kernel/zarch/ismax.c +++ b/kernel/zarch/ismax.c @@ -186,7 +186,7 @@ static BLASLONG ismax_kernel_64(BLASLONG n, FLOAT *x, FLOAT *max) { "2:\n\t" "nop 0" : [imax] "=r"(imax),[max] "=Q"(*max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/ismin.c b/kernel/zarch/ismin.c index a27c8a743..e57c0bfa6 100644 --- a/kernel/zarch/ismin.c +++ b/kernel/zarch/ismin.c @@ -186,7 +186,7 @@ static BLASLONG ismin_kernel_64(BLASLONG n, FLOAT *x, FLOAT *min) { "2:\n\t" "nop 0" : [imin] "=r"(imin),[min] "=Q"(*min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v4", "v5", "v6", "v7", "v8", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/izamax.c b/kernel/zarch/izamax.c index cb299cb24..fda76f471 100644 --- a/kernel/zarch/izamax.c +++ b/kernel/zarch/izamax.c @@ -159,7 +159,7 @@ static BLASLONG izamax_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amax) { "2:\n\t" "nop 0" : [iamax] "=r"(iamax),[amax] "=Q"(*amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); diff --git a/kernel/zarch/izamin.c b/kernel/zarch/izamin.c index 4dfa1a9db..412ab15ca 100644 --- a/kernel/zarch/izamin.c +++ b/kernel/zarch/izamin.c @@ -159,7 +159,7 @@ static BLASLONG izamin_kernel_16(BLASLONG n, FLOAT *x, FLOAT *amin) { "2:\n\t" "nop 0" : [iamin] "=r"(iamin),[amin] "=Q"(*amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); diff --git a/kernel/zarch/samax.c b/kernel/zarch/samax.c index fdda6dd32..20da4406a 100644 --- a/kernel/zarch/samax.c +++ b/kernel/zarch/samax.c @@ -78,7 +78,7 @@ static FLOAT samax_kernel_64(BLASLONG n, FLOAT *x) { "wfmaxsb %%v0,%%v0,%%v16,8\n\t" "lper %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/samin.c b/kernel/zarch/samin.c index f05e851f9..e7e4fd9b7 100644 --- a/kernel/zarch/samin.c +++ b/kernel/zarch/samin.c @@ -78,7 +78,7 @@ static FLOAT samin_kernel_64(BLASLONG n, FLOAT *x) { "wfminsb %%v0,%%v0,%%v16,8\n\t" "lper %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/sasum.c b/kernel/zarch/sasum.c index d56f2697b..4cf74f351 100644 --- a/kernel/zarch/sasum.c +++ b/kernel/zarch/sasum.c @@ -108,7 +108,7 @@ static FLOAT sasum_kernel_64(BLASLONG n, FLOAT *x) { "vfasb %%v24,%%v24,%%v25\n\t" "vstef %%v24,%[asum],0" : [asum] "=Q"(asum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/saxpy.c b/kernel/zarch/saxpy.c index ca34a47ff..8bcb1a61b 100644 --- a/kernel/zarch/saxpy.c +++ b/kernel/zarch/saxpy.c @@ -100,8 +100,8 @@ static void saxpy_kernel_64(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) { "vst %%v27,240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), + : "+m"(*(FLOAT (*)[n]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), [alpha] "Q"(*alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/scopy.c b/kernel/zarch/scopy.c index 5c453cfbb..631c9f929 100644 --- a/kernel/zarch/scopy.c +++ b/kernel/zarch/scopy.c @@ -36,8 +36,8 @@ static void scopy_kernel_64(BLASLONG n, FLOAT *x, FLOAT *y) { "la %[x],256(%[x])\n\t" "la %[y],256(%[y])\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n]; } *) y),[x] "+&a"(x),[y] "+&a"(y),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x) + : "=m"(*(FLOAT (*)[n]) y),[x] "+&a"(x),[y] "+&a"(y),[n] "+&r"(n) + : "m"(*(const FLOAT (*)[n]) x) : "cc"); } diff --git a/kernel/zarch/sdot.c b/kernel/zarch/sdot.c index d870b30f0..d27c17162 100644 --- a/kernel/zarch/sdot.c +++ b/kernel/zarch/sdot.c @@ -84,8 +84,8 @@ static FLOAT sdot_kernel_32(BLASLONG n, FLOAT *x, FLOAT *y) { "aebr %%f0,%%f3\n\t" "ler %[dot],%%f0" : [dot] "=f"(dot),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n]; } *) y),[y] "a"(y) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/sgemv_n_4.c b/kernel/zarch/sgemv_n_4.c index a0d522b83..b4cfb61de 100644 --- a/kernel/zarch/sgemv_n_4.c +++ b/kernel/zarch/sgemv_n_4.c @@ -160,12 +160,12 @@ static void sgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "brctg %%r0,2b\n\t" "3:\n\t" "nop 0" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[4]; } *) x),[x] "a"(x),[alpha] "Q"(*alpha), + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[4]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", @@ -259,10 +259,10 @@ static void sgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "brctg %%r0,2b\n\t" "3:\n\t" "nop 0" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[2]; } *) x),[x] "a"(x),[alpha] "Q"(*alpha), + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[2]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", @@ -332,8 +332,8 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y, "brctg %%r0,2b\n\t" "3:\n\t" "nop 0" - : "+m"(*(struct { FLOAT x[n]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), + : "+m"(*(FLOAT (*)[n]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) a0),[a0] "a"(a0), "m"(*(const FLOAT (*)[1]) x),[x] "a"(x),[alpha] "Q"(*alpha), [n] "r"(n) : "cc", "r0", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", diff --git a/kernel/zarch/sgemv_t_4.c b/kernel/zarch/sgemv_t_4.c index 81e600695..3c708200c 100644 --- a/kernel/zarch/sgemv_t_4.c +++ b/kernel/zarch/sgemv_t_4.c @@ -172,12 +172,12 @@ static void sgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vrepg %%v4,%%v3,1\n\t" "aebr %%f3,%%f4\n\t" "ste %%f3,12(%[y])" - : "=m"(*(struct { FLOAT x[4]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : "=m"(*(FLOAT (*)[4]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -278,10 +278,10 @@ static void sgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vrepg %%v2,%%v1,1\n\t" "aebr %%f1,%%f2\n\t" "ste %%f1,4(%[y])" - : "=m"(*(struct { FLOAT x[2]; } *) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : "=m"(*(FLOAT (*)[2]) y) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -357,8 +357,8 @@ static void sgemv_kernel_4x1(BLASLONG n, FLOAT *a0, FLOAT *x, FLOAT *y) { "aebr %%f0,%%f1\n\t" "ste %%f0,0(%[y])" : "=m"(*(FLOAT (*)[1]) y) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n]; } *) a0),[a0] "a"(a0), - "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x),[n] "r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n]) a0),[a0] "a"(a0), + "m"(*(const FLOAT (*)[n]) x),[x] "a"(x),[n] "r"(n) : "cc", "r0", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -432,8 +432,8 @@ static void add_y_kernel_4(BLASLONG n, FLOAT da, FLOAT *src, FLOAT *dest) { "brctg %%r0,2b\n\t" "3:\n\t" "nop 0" - : "+m"(*(struct { FLOAT x[n]; } *) dest) - : [dest] "a"(dest),[da] "Q"(da), "m"(*(const struct { FLOAT x[n]; } *) src), + : "+m"(*(FLOAT (*)[n]) dest) + : [dest] "a"(dest),[da] "Q"(da), "m"(*(const FLOAT (*)[n]) src), [src] "a"(src),[n] "r"(n) : "cc", "r0", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/smax.c b/kernel/zarch/smax.c index 7015aaa1d..0c7433cbc 100644 --- a/kernel/zarch/smax.c +++ b/kernel/zarch/smax.c @@ -75,7 +75,7 @@ static FLOAT smax_kernel_64(BLASLONG n, FLOAT *x) { "wfmaxsb %%v0,%%v0,%%v16,0\n\t" "ler %[max],%%f0" : [max] "=f"(max),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT(*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/smin.c b/kernel/zarch/smin.c index b6875c5c6..5e0f3860d 100644 --- a/kernel/zarch/smin.c +++ b/kernel/zarch/smin.c @@ -75,7 +75,7 @@ static FLOAT smin_kernel_64(BLASLONG n, FLOAT *x) { "wfminsb %%v0,%%v0,%%v16,0\n\t" "ler %[min],%%f0" : [min] "=f"(min),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/srot.c b/kernel/zarch/srot.c index 4f471d866..c235adcbe 100644 --- a/kernel/zarch/srot.c +++ b/kernel/zarch/srot.c @@ -169,7 +169,7 @@ static void srot_kernel_64(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *c, FLOAT *s) { "vst %%v23, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x), "+m"(*(struct { FLOAT x[n]; } *) y), + : "+m"(*(FLOAT (*)[n]) x), "+m"(*(FLOAT (*)[n]) y), [n] "+&r"(n) : [x] "a"(x),[y] "a"(y),[c] "Q"(*c),[s] "Q"(*s) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", diff --git a/kernel/zarch/sscal.c b/kernel/zarch/sscal.c index 9b9930dc8..da2f49eaf 100644 --- a/kernel/zarch/sscal.c +++ b/kernel/zarch/sscal.c @@ -59,7 +59,7 @@ static void sscal_kernel_32(BLASLONG n, FLOAT da, FLOAT *x) { "vst %%v31,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n]) x),[n] "+&r"(n) : [x] "a"(x),[da] "Q"(da) : "cc", "r1", "v0", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -81,7 +81,7 @@ static void sscal_kernel_32_zero(BLASLONG n, FLOAT *x) { "vst %%v0,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n]; } *) x),[n] "+&r"(n) + : "=m"(*(FLOAT (*)[n]) x),[n] "+&r"(n) : [x] "a"(x) : "cc", "r1", "v0"); } diff --git a/kernel/zarch/ssum.c b/kernel/zarch/ssum.c index 3f3f46a85..02aabdff6 100644 --- a/kernel/zarch/ssum.c +++ b/kernel/zarch/ssum.c @@ -91,7 +91,7 @@ static FLOAT ssum_kernel_64(BLASLONG n, FLOAT *x) { "vfasb %%v24,%%v24,%%v25\n\t" "vstef %%v24,%[sum],0" : [sum] "=Q"(sum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/sswap.c b/kernel/zarch/sswap.c index 0c62f189d..ec860765a 100644 --- a/kernel/zarch/sswap.c +++ b/kernel/zarch/sswap.c @@ -99,7 +99,7 @@ static void sswap_kernel_64(BLASLONG n, FLOAT *x, FLOAT *y) { "vst %%v31, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n]; } *) x), "+m"(*(struct { FLOAT x[n]; } *) y), + : "+m"(*(FLOAT (*)[n]) x), "+m"(*(FLOAT (*)[n]) y), [n] "+&r"(n) : [x] "a"(x),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", diff --git a/kernel/zarch/zamax.c b/kernel/zarch/zamax.c index aa04ab91f..98e40d073 100644 --- a/kernel/zarch/zamax.c +++ b/kernel/zarch/zamax.c @@ -114,7 +114,7 @@ static FLOAT zamax_kernel_16(BLASLONG n, FLOAT *x) { "wfmaxdb %%v0,%%v0,%%v16,0\n\t" "ldr %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zamax_z13.c b/kernel/zarch/zamax_z13.c index 37278d6db..f727ad67a 100644 --- a/kernel/zarch/zamax_z13.c +++ b/kernel/zarch/zamax_z13.c @@ -123,7 +123,7 @@ static FLOAT zamax_kernel_16(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[amax],%%f0" : [amax] "=f"(amax),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); diff --git a/kernel/zarch/zamin.c b/kernel/zarch/zamin.c index 0b5402853..2e43fefd9 100644 --- a/kernel/zarch/zamin.c +++ b/kernel/zarch/zamin.c @@ -114,7 +114,7 @@ static FLOAT zamin_kernel_16(BLASLONG n, FLOAT *x) { "wfmindb %%v0,%%v0,%%v16,0\n\t" "ldr %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zamin_z13.c b/kernel/zarch/zamin_z13.c index e37bb2236..e52802595 100644 --- a/kernel/zarch/zamin_z13.c +++ b/kernel/zarch/zamin_z13.c @@ -123,7 +123,7 @@ static FLOAT zamin_kernel_16(BLASLONG n, FLOAT *x) { "vsel %%v0,%%v0,%%v16,%%v17\n\t" "ldr %[amin],%%f0" : [amin] "=f"(amin),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); diff --git a/kernel/zarch/zasum.c b/kernel/zarch/zasum.c index aeef8d77e..0003f38a5 100644 --- a/kernel/zarch/zasum.c +++ b/kernel/zarch/zasum.c @@ -106,7 +106,7 @@ static FLOAT zasum_kernel_16(BLASLONG n, FLOAT *x) { "vfadb %%v24,%%v24,%%v25\n\t" "vsteg %%v24,%[asum],0" : [asum] "=Q"(asum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zaxpy.c b/kernel/zarch/zaxpy.c index 9363ec32d..f2c115597 100644 --- a/kernel/zarch/zaxpy.c +++ b/kernel/zarch/zaxpy.c @@ -95,9 +95,9 @@ static void zaxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) { "vst %%v19,112(%%r1,%[y])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zcopy.c b/kernel/zarch/zcopy.c index 5a46aec1c..d91d9f367 100644 --- a/kernel/zarch/zcopy.c +++ b/kernel/zarch/zcopy.c @@ -36,9 +36,9 @@ static void zcopy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y) { "la %[x],256(%[x])\n\t" "la %[y],256(%[y])\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n * 2]; } *) y),[x] "+&a"(x),[y] "+&a"(y), + : "=m"(*(FLOAT (*)[n * 2]) y),[x] "+&a"(x),[y] "+&a"(y), [n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x) + : "m"(*(const FLOAT (*)[n * 2]) x) : "cc"); } diff --git a/kernel/zarch/zdot.c b/kernel/zarch/zdot.c index ac6e69c23..6b7144101 100644 --- a/kernel/zarch/zdot.c +++ b/kernel/zarch/zdot.c @@ -93,9 +93,9 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) { "vsteg %%v24,8(%[d]),1\n\t" "vsteg %%v25,16(%[d]),1\n\t" "vsteg %%v25,24(%[d]),0" - : "=m"(*(struct { FLOAT x[4]; } *) d),[n] "+&r"(n) - : [d] "a"(d), "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[n * 2]; } *) y),[y] "a"(y) + : "=m"(*(FLOAT (*)[4]) d),[n] "+&r"(n) + : [d] "a"(d), "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[n * 2]) y),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zgemv_n_4.c b/kernel/zarch/zgemv_n_4.c index 13045a359..2ef9b4de8 100644 --- a/kernel/zarch/zgemv_n_4.c +++ b/kernel/zarch/zgemv_n_4.c @@ -112,12 +112,12 @@ static void zgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vst %%v1,16(%%r1,%[y])\n\t" "agfi %%r1,32\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[8]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n * 2]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[8]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -172,10 +172,10 @@ static void zgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) { "vst %%v1,16(%%r1,%[y])\n\t" "agfi %%r1,32\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[4]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[4]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); } @@ -210,9 +210,9 @@ static void zgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) { "vst %%v1,16(%%r1,%[y])\n\t" "agfi %%r1,32\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap),[ap] "a"(ap), - "m"(*(const struct { FLOAT x[2]; } *) x),[x] "a"(x) + : "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap),[ap] "a"(ap), + "m"(*(const FLOAT (*)[2]) x),[x] "a"(x) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21"); } @@ -261,8 +261,8 @@ static void add_y_4(BLASLONG n, FLOAT *src, FLOAT *dest, FLOAT alpha_r, "vst %%v31,48(%%r1,%[dest])\n\t" "agfi %%r1,64\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) dest),[n] "+&r"(n) - : [dest] "a"(dest), "m"(*(const struct { FLOAT x[n * 2]; } *) src), + : "+m"(*(FLOAT (*)[n * 2]) dest),[n] "+&r"(n) + : [dest] "a"(dest), "m"(*(const FLOAT (*)[n * 2]) src), [src] "a"(src),[alpha_r] "Q"(alpha_r),[alpha_i] "Q"(alpha_i) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/zgemv_t_4.c b/kernel/zarch/zgemv_t_4.c index 031c31e29..c10769266 100644 --- a/kernel/zarch/zgemv_t_4.c +++ b/kernel/zarch/zgemv_t_4.c @@ -141,13 +141,13 @@ static void zgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "vst %%v27,16(%[y])\n\t" "vst %%v28,32(%[y])\n\t" "vst %%v29,48(%[y])" - : "+m"(*(struct { FLOAT x[8]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap2),[ap2] "a"(ap2), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap3),[ap3] "a"(ap3), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[8]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) ap2),[ap2] "a"(ap2), + "m"(*(const FLOAT (*)[n * 2]) ap3),[ap3] "a"(ap3), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); @@ -229,11 +229,11 @@ static void zgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, "vfmadb %%v23,%%v19,%%v21,%%v23\n\t" "vst %%v22,0(%[y])\n\t" "vst %%v23,16(%[y])\n\t" - : "+m"(*(struct { FLOAT x[4]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap0),[ap0] "a"(ap0), - "m"(*(const struct { FLOAT x[n * 2]; } *) ap1),[ap1] "a"(ap1), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[4]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap0),[ap0] "a"(ap0), + "m"(*(const FLOAT (*)[n * 2]) ap1),[ap1] "a"(ap1), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } @@ -294,10 +294,10 @@ static void zgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, "vfmadb %%v0,%%v16,%%v18,%%v0\n\t" "vfmadb %%v0,%%v17,%%v19,%%v0\n\t" "vst %%v0,0(%[y])\n\t" - : "+m"(*(struct { FLOAT x[2]; } *) y),[n] "+&r"(n) - : [y] "a"(y), "m"(*(const struct { FLOAT x[n * 2]; } *) ap),[ap] "a"(ap), - "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x), - "m"(*(const struct { FLOAT x[2]; } *) alpha),[alpha] "a"(alpha) + : "+m"(*(FLOAT (*)[2]) y),[n] "+&r"(n) + : [y] "a"(y), "m"(*(const FLOAT (*)[n * 2]) ap),[ap] "a"(ap), + "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x), + "m"(*(const FLOAT (*)[2]) alpha),[alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19"); } diff --git a/kernel/zarch/zrot.c b/kernel/zarch/zrot.c index 6284d5a47..3b87e356a 100644 --- a/kernel/zarch/zrot.c +++ b/kernel/zarch/zrot.c @@ -169,8 +169,8 @@ static void zrot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *c, FLOAT *s) { "vst %%v23, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x), - "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n * 2]) x), + "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) : [x] "a"(x),[y] "a"(y),[c] "Q"(*c),[s] "Q"(*s) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", diff --git a/kernel/zarch/zscal.c b/kernel/zarch/zscal.c index e497a6d7b..a5a8f694d 100644 --- a/kernel/zarch/zscal.c +++ b/kernel/zarch/zscal.c @@ -78,8 +78,8 @@ static void zscal_kernel_8(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", @@ -128,8 +128,8 @@ static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); @@ -167,8 +167,8 @@ static void zscal_kernel_8_zero_i(BLASLONG n, FLOAT *alpha, FLOAT *x) { "vst %%v23,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) - : [x] "a"(x), "m"(*(const struct { FLOAT x[2]; } *) alpha), + : "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) + : [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha), [alpha] "a"(alpha) : "cc", "r1", "v0", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); @@ -190,7 +190,7 @@ static void zscal_kernel_8_zero(BLASLONG n, FLOAT *x) { "vst %%v0,112(%%r1,%[x])\n\t" "agfi %%r1,128\n\t" "brctg %[n],0b" - : "=m"(*(struct { FLOAT x[n * 2]; } *) x),[n] "+&r"(n) + : "=m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n) : [x] "a"(x) : "cc", "r1", "v0"); } diff --git a/kernel/zarch/zsum.c b/kernel/zarch/zsum.c index e0f978d87..b35832af8 100644 --- a/kernel/zarch/zsum.c +++ b/kernel/zarch/zsum.c @@ -89,7 +89,7 @@ static FLOAT zsum_kernel_16(BLASLONG n, FLOAT *x) { "vfadb %%v24,%%v24,%%v25\n\t" "vsteg %%v24,%[sum],0" : [sum] "=Q"(sum),[n] "+&r"(n) - : "m"(*(const struct { FLOAT x[n * 2]; } *) x),[x] "a"(x) + : "m"(*(const FLOAT (*)[n * 2]) x),[x] "a"(x) : "cc", "r1", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"); diff --git a/kernel/zarch/zswap.c b/kernel/zarch/zswap.c index bc466866c..7a2d1f882 100644 --- a/kernel/zarch/zswap.c +++ b/kernel/zarch/zswap.c @@ -99,8 +99,8 @@ static void zswap_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y) { "vst %%v31, 240(%%r1,%[y])\n\t" "agfi %%r1,256\n\t" "brctg %[n],0b" - : "+m"(*(struct { FLOAT x[n * 2]; } *) x), - "+m"(*(struct { FLOAT x[n * 2]; } *) y),[n] "+&r"(n) + : "+m"(*(FLOAT (*)[n * 2]) x), + "+m"(*(FLOAT (*)[n * 2]) y),[n] "+&r"(n) : [x] "a"(x),[y] "a"(y) : "cc", "r1", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", From 095f4e6964ba150b1293747d842a60294836be45 Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 1 Sep 2020 15:09:32 +0200 Subject: [PATCH 12/13] s390x: allow clang to emit fused multiply-adds (replicates gcc's default behavior) gcc's default setting for floating-point expression contraction is "fast", which allows the compiler to emit fused multiply adds instead of separate multiplies and adds (amongst others). Fused multiply-adds, which assembly kernels typically apply, also bring a significant performance advantage to the C implementation for matrix-matrix multiplication on s390x. To enable that performance advantage for builds with clang, add -ffp-contract=fast to the compiler options. Signed-off-by: Marius Hillenbrand --- Makefile.zarch | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Makefile.zarch b/Makefile.zarch index be1e34f6d..b841d9b4d 100644 --- a/Makefile.zarch +++ b/Makefile.zarch @@ -8,3 +8,9 @@ ifeq ($(CORE), Z14) CCOMMON_OPT += -march=z14 -mzvector -O3 FCOMMON_OPT += -march=z14 -mzvector endif + +# Enable floating-point expression contraction for clang, since it is the +# default for gcc +ifeq ($(C_COMPILER), CLANG) +CCOMMON_OPT += -ffp-contract=fast +endif From 2ee5b899ce9777c63710de1ede75c362db5bcd47 Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 1 Sep 2020 16:16:53 +0200 Subject: [PATCH 13/13] s390x: enable S/DGEMM block with explicit loop unrolling + interleaving with clang The code for SGEMM 16x4 and DGEMM 8x4 blocks on z14 and z15 uses explicit unrolling and interleaving to improve performance. The code employs an empty inline asm statement with operands that constrain the compiler's instruction scheduling and thereby enforce proper overlapping of load and compute phases. Fix an ifdef to apply that for clang builds, as well. Signed-off-by: Marius Hillenbrand --- kernel/zarch/gemm_vec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/zarch/gemm_vec.c b/kernel/zarch/gemm_vec.c index b7d7cc04b..ef0b1d1e3 100644 --- a/kernel/zarch/gemm_vec.c +++ b/kernel/zarch/gemm_vec.c @@ -393,7 +393,7 @@ static inline void GEBP_block_16_4( * Note that we need to massage this particular "barrier" * depending on the gcc version. */ -#if __GNUC__ > 7 +#if __GNUC__ > 7 || defined(__clang__) #define BARRIER_READ_BEFORE_COMPUTE(SUFFIX) \ do { \ asm("" \