[ZARCH] fix cgemv_n_4.c

This commit is contained in:
maamountki 2019-01-04 17:45:56 +02:00 committed by GitHub
parent 1aa840a0a2
commit 94cd946b96
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 154 additions and 154 deletions

View File

@ -34,107 +34,107 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
static void cgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
{
__asm__ volatile (
"vlrepg %%v16,0(%5) \n\t"
"vlrepg %%v17,8(%5) \n\t"
"vlrepg %%v18,16(%5) \n\t"
"vlrepg %%v19,24(%5) \n\t"
"vlrepg %%v16,0(%5) \n\t"
"vlrepg %%v17,8(%5) \n\t"
"vlrepg %%v18,16(%5) \n\t"
"vlrepg %%v19,24(%5) \n\t"
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
"vlef %%v20,4(%5),0 \n\t"
"vlef %%v20,4(%5),2 \n\t"
"vflcsb %%v20,%%v20 \n\t"
"vlef %%v20,0(%5),1 \n\t"
"vlef %%v20,0(%5),3 \n\t"
"vlef %%v20,4(%5),0 \n\t"
"vlef %%v20,4(%5),2 \n\t"
"vflcsb %%v20,%%v20 \n\t"
"vlef %%v20,0(%5),1 \n\t"
"vlef %%v20,0(%5),3 \n\t"
"vlef %%v21,12(%5),0 \n\t"
"vlef %%v21,12(%5),2 \n\t"
"vflcsb %%v21,%%v21 \n\t"
"vlef %%v21,8(%5),1 \n\t"
"vlef %%v21,8(%5),3 \n\t"
"vlef %%v21,12(%5),0 \n\t"
"vlef %%v21,12(%5),2 \n\t"
"vflcsb %%v21,%%v21 \n\t"
"vlef %%v21,8(%5),1 \n\t"
"vlef %%v21,8(%5),3 \n\t"
"vlef %%v22,20(%5),0 \n\t"
"vlef %%v22,20(%5),2 \n\t"
"vflcsb %%v22,%%v22 \n\t"
"vlef %%v22,16(%5),1 \n\t"
"vlef %%v22,16(%5),3 \n\t"
"vlef %%v22,20(%5),0 \n\t"
"vlef %%v22,20(%5),2 \n\t"
"vflcsb %%v22,%%v22 \n\t"
"vlef %%v22,16(%5),1 \n\t"
"vlef %%v22,16(%5),3 \n\t"
"vlef %%v23,28(%5),0 \n\t"
"vlef %%v23,28(%5),2 \n\t"
"vflcsb %%v23,%%v23 \n\t"
"vlef %%v23,24(%5),1 \n\t"
"vlef %%v23,24(%5),3 \n\t"
"vlef %%v23,28(%5),0 \n\t"
"vlef %%v23,28(%5),2 \n\t"
"vflcsb %%v23,%%v23 \n\t"
"vlef %%v23,24(%5),1 \n\t"
"vlef %%v23,24(%5),3 \n\t"
#else
"vlef %%v20,0(%5),1 \n\t"
"vlef %%v20,0(%5),3 \n\t"
"vflcsb %%v20,%%v20 \n\t"
"vlef %%v20,4(%5),0 \n\t"
"vlef %%v20,4(%5),2 \n\t"
"vlef %%v20,0(%5),1 \n\t"
"vlef %%v20,0(%5),3 \n\t"
"vflcsb %%v20,%%v20 \n\t"
"vlef %%v20,4(%5),0 \n\t"
"vlef %%v20,4(%5),2 \n\t"
"vlef %%v21,8(%5),1 \n\t"
"vlef %%v21,8(%5),3 \n\t"
"vflcsb %%v21,%%v21 \n\t"
"vlef %%v21,12(%5),0 \n\t"
"vlef %%v21,12(%5),2 \n\t"
"vlef %%v21,8(%5),1 \n\t"
"vlef %%v21,8(%5),3 \n\t"
"vflcsb %%v21,%%v21 \n\t"
"vlef %%v21,12(%5),0 \n\t"
"vlef %%v21,12(%5),2 \n\t"
"vlef %%v22,16(%5),1 \n\t"
"vlef %%v22,16(%5),3 \n\t"
"vflcsb %%v22,%%v22 \n\t"
"vlef %%v22,20(%5),0 \n\t"
"vlef %%v22,20(%5),2 \n\t"
"vlef %%v22,16(%5),1 \n\t"
"vlef %%v22,16(%5),3 \n\t"
"vflcsb %%v22,%%v22 \n\t"
"vlef %%v22,20(%5),0 \n\t"
"vlef %%v22,20(%5),2 \n\t"
"vlef %%v23,24(%5),1 \n\t"
"vlef %%v23,24(%5),3 \n\t"
"vflcsb %%v23,%%v23 \n\t"
"vlef %%v23,28(%5),0 \n\t"
"vlef %%v23,28(%5),2 \n\t"
"vlef %%v23,24(%5),1 \n\t"
"vlef %%v23,24(%5),3 \n\t"
"vflcsb %%v23,%%v23 \n\t"
"vlef %%v23,28(%5),0 \n\t"
"vlef %%v23,28(%5),2 \n\t"
#endif
"xgr %%r1,%%r1 \n\t"
"srlg %%r0,%%r0,1 \n\t"
"0: \n\t"
"pfd 1,1024(%%r1,%1) \n\t"
"pfd 1,1024(%%r1,%2) \n\t"
"pfd 1,1024(%%r1,%3) \n\t"
"pfd 1,1024(%%r1,%4) \n\t"
"pfd 2,1024(%%r1,%6) \n\t"
"xgr %%r1,%%r1 \n\t"
"srlg %%r0,%0,1 \n\t"
"0: \n\t"
"pfd 1,1024(%%r1,%1) \n\t"
"pfd 1,1024(%%r1,%2) \n\t"
"pfd 1,1024(%%r1,%3) \n\t"
"pfd 1,1024(%%r1,%4) \n\t"
"pfd 2,1024(%%r1,%6) \n\t"
"vlef %%v24,0(%%r1,%1),0 \n\t"
"vlef %%v24,0(%%r1,%1),1 \n\t"
"vlef %%v24,8(%%r1,%1),2 \n\t"
"vlef %%v24,8(%%r1,%1),3 \n\t"
"vlef %%v25,4(%%r1,%1),0 \n\t"
"vlef %%v25,4(%%r1,%1),1 \n\t"
"vlef %%v25,12(%%r1,%1),2 \n\t"
"vlef %%v25,12(%%r1,%1),3 \n\t"
"vlef %%v26,0(%%r1,%2),0 \n\t"
"vlef %%v26,0(%%r1,%2),1 \n\t"
"vlef %%v26,8(%%r1,%2),2 \n\t"
"vlef %%v26,8(%%r1,%2),3 \n\t"
"vlef %%v27,4(%%r1,%2),0 \n\t"
"vlef %%v27,4(%%r1,%2),1 \n\t"
"vlef %%v27,12(%%r1,%2),2 \n\t"
"vlef %%v27,12(%%r1,%2),3 \n\t"
"vlef %%v24,0(%%r1,%1),0 \n\t"
"vlef %%v24,0(%%r1,%1),1 \n\t"
"vlef %%v24,8(%%r1,%1),2 \n\t"
"vlef %%v24,8(%%r1,%1),3 \n\t"
"vlef %%v25,4(%%r1,%1),0 \n\t"
"vlef %%v25,4(%%r1,%1),1 \n\t"
"vlef %%v25,12(%%r1,%1),2 \n\t"
"vlef %%v25,12(%%r1,%1),3 \n\t"
"vlef %%v26,0(%%r1,%2),0 \n\t"
"vlef %%v26,0(%%r1,%2),1 \n\t"
"vlef %%v26,8(%%r1,%2),2 \n\t"
"vlef %%v26,8(%%r1,%2),3 \n\t"
"vlef %%v27,4(%%r1,%2),0 \n\t"
"vlef %%v27,4(%%r1,%2),1 \n\t"
"vlef %%v27,12(%%r1,%2),2 \n\t"
"vlef %%v27,12(%%r1,%2),3 \n\t"
"vl %%v0,0(%%r1,%6) \n\t"
"vfmasb %%v0,%%v24,%%v16,%%v0 \n\t"
"vfmasb %%v0,%%v25,%%v20,%%v0 \n\t"
"vfmasb %%v0,%%v26,%%v17,%%v0 \n\t"
"vfmasb %%v0,%%v27,%%v21,%%v0 \n\t"
"vl %%v0,0(%%r1,%6) \n\t"
"vfmasb %%v0,%%v24,%%v16,%%v0 \n\t"
"vfmasb %%v0,%%v25,%%v20,%%v0 \n\t"
"vfmasb %%v0,%%v26,%%v17,%%v0 \n\t"
"vfmasb %%v0,%%v27,%%v21,%%v0 \n\t"
"vlef %%v28,0(%%r1,%1),0 \n\t"
"vlef %%v28,0(%%r1,%1),1 \n\t"
"vlef %%v28,8(%%r1,%1),2 \n\t"
"vlef %%v28,8(%%r1,%1),3 \n\t"
"vlef %%v29,4(%%r1,%1),0 \n\t"
"vlef %%v29,4(%%r1,%1),1 \n\t"
"vlef %%v29,12(%%r1,%1),2 \n\t"
"vlef %%v29,12(%%r1,%1),3 \n\t"
"vlef %%v30,0(%%r1,%2),0 \n\t"
"vlef %%v30,0(%%r1,%2),1 \n\t"
"vlef %%v30,8(%%r1,%2),2 \n\t"
"vlef %%v30,8(%%r1,%2),3 \n\t"
"vlef %%v31,4(%%r1,%2),0 \n\t"
"vlef %%v31,4(%%r1,%2),1 \n\t"
"vlef %%v31,12(%%r1,%2),2 \n\t"
"vlef %%v31,12(%%r1,%2),3 \n\t"
"vlef %%v28,0(%%r1,%1),0 \n\t"
"vlef %%v28,0(%%r1,%1),1 \n\t"
"vlef %%v28,8(%%r1,%1),2 \n\t"
"vlef %%v28,8(%%r1,%1),3 \n\t"
"vlef %%v29,4(%%r1,%1),0 \n\t"
"vlef %%v29,4(%%r1,%1),1 \n\t"
"vlef %%v29,12(%%r1,%1),2 \n\t"
"vlef %%v29,12(%%r1,%1),3 \n\t"
"vlef %%v30,0(%%r1,%2),0 \n\t"
"vlef %%v30,0(%%r1,%2),1 \n\t"
"vlef %%v30,8(%%r1,%2),2 \n\t"
"vlef %%v30,8(%%r1,%2),3 \n\t"
"vlef %%v31,4(%%r1,%2),0 \n\t"
"vlef %%v31,4(%%r1,%2),1 \n\t"
"vlef %%v31,12(%%r1,%2),2 \n\t"
"vlef %%v31,12(%%r1,%2),3 \n\t"
"vfmasb %%v0,%%v28,%%v18,%%v0 \n\t"
"vfmasb %%v0,%%v29,%%v22,%%v0 \n\t"
@ -153,56 +153,56 @@ static void cgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
static void cgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
{
__asm__ volatile (
"vlrepg %%v16,0(%3) \n\t"
"vlrepg %%v17,8(%3) \n\t"
"vlrepg %%v16,0(%3) \n\t"
"vlrepg %%v17,8(%3) \n\t"
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
"vlef %%v18,4(%3),0 \n\t"
"vlef %%v18,4(%3),2 \n\t"
"vflcsb %%v18,%%v18 \n\t"
"vlef %%v18,0(%3),1 \n\t"
"vlef %%v18,0(%3),3 \n\t"
"vlef %%v18,4(%3),0 \n\t"
"vlef %%v18,4(%3),2 \n\t"
"vflcsb %%v18,%%v18 \n\t"
"vlef %%v18,0(%3),1 \n\t"
"vlef %%v18,0(%3),3 \n\t"
"vlef %%v19,12(%3),0 \n\t"
"vlef %%v19,12(%3),2 \n\t"
"vflcsb %%v19,%%v19 \n\t"
"vlef %%v19,8(%3),1 \n\t"
"vlef %%v19,8(%3),3 \n\t"
"vlef %%v19,12(%3),0 \n\t"
"vlef %%v19,12(%3),2 \n\t"
"vflcsb %%v19,%%v19 \n\t"
"vlef %%v19,8(%3),1 \n\t"
"vlef %%v19,8(%3),3 \n\t"
#else
"vlef %%v18,0(%3),1 \n\t"
"vlef %%v18,0(%3),3 \n\t"
"vflcsb %%v18,%%v18 \n\t"
"vlef %%v18,4(%3),0 \n\t"
"vlef %%v18,4(%3),2 \n\t"
"vlef %%v18,0(%3),1 \n\t"
"vlef %%v18,0(%3),3 \n\t"
"vflcsb %%v18,%%v18 \n\t"
"vlef %%v18,4(%3),0 \n\t"
"vlef %%v18,4(%3),2 \n\t"
"vlef %%v19,8(%3),1 \n\t"
"vlef %%v19,8(%3),3 \n\t"
"vflcsb %%v19,%%v19 \n\t"
"vlef %%v19,12(%3),0 \n\t"
"vlef %%v19,12(%3),2 \n\t"
"vlef %%v19,8(%3),1 \n\t"
"vlef %%v19,8(%3),3 \n\t"
"vflcsb %%v19,%%v19 \n\t"
"vlef %%v19,12(%3),0 \n\t"
"vlef %%v19,12(%3),2 \n\t"
#endif
"xgr %%r1,%%r1 \n\t"
"srlg %%r0,%%r0,1 \n\t"
"0: \n\t"
"pfd 1,1024(%%r1,%1) \n\t"
"pfd 1,1024(%%r1,%2) \n\t"
"pfd 2,1024(%%r1,%4) \n\t"
"xgr %%r1,%%r1 \n\t"
"srlg %%r0,%0,1 \n\t"
"0: \n\t"
"pfd 1,1024(%%r1,%1) \n\t"
"pfd 1,1024(%%r1,%2) \n\t"
"pfd 2,1024(%%r1,%4) \n\t"
"vlef %%v20,0(%%r1,%1),0 \n\t"
"vlef %%v20,0(%%r1,%1),1 \n\t"
"vlef %%v20,8(%%r1,%1),2 \n\t"
"vlef %%v20,8(%%r1,%1),3 \n\t"
"vlef %%v21,4(%%r1,%1),0 \n\t"
"vlef %%v21,4(%%r1,%1),1 \n\t"
"vlef %%v21,12(%%r1,%1),2 \n\t"
"vlef %%v21,12(%%r1,%1),3 \n\t"
"vlef %%v22,0(%%r1,%2),0 \n\t"
"vlef %%v22,0(%%r1,%2),1 \n\t"
"vlef %%v22,8(%%r1,%2),2 \n\t"
"vlef %%v22,8(%%r1,%2),3 \n\t"
"vlef %%v23,4(%%r1,%2),0 \n\t"
"vlef %%v23,4(%%r1,%2),1 \n\t"
"vlef %%v23,12(%%r1,%2),2 \n\t"
"vlef %%v23,12(%%r1,%2),3 \n\t"
"vlef %%v20,0(%%r1,%1),0 \n\t"
"vlef %%v20,0(%%r1,%1),1 \n\t"
"vlef %%v20,8(%%r1,%1),2 \n\t"
"vlef %%v20,8(%%r1,%1),3 \n\t"
"vlef %%v21,4(%%r1,%1),0 \n\t"
"vlef %%v21,4(%%r1,%1),1 \n\t"
"vlef %%v21,12(%%r1,%1),2 \n\t"
"vlef %%v21,12(%%r1,%1),3 \n\t"
"vlef %%v22,0(%%r1,%2),0 \n\t"
"vlef %%v22,0(%%r1,%2),1 \n\t"
"vlef %%v22,8(%%r1,%2),2 \n\t"
"vlef %%v22,8(%%r1,%2),3 \n\t"
"vlef %%v23,4(%%r1,%2),0 \n\t"
"vlef %%v23,4(%%r1,%2),1 \n\t"
"vlef %%v23,12(%%r1,%2),2 \n\t"
"vlef %%v23,12(%%r1,%2),3 \n\t"
"vl %%v0,0(%%r1,%4) \n\t"
"vfmasb %%v0,%%v20,%%v16,%%v0 \n\t"
@ -222,34 +222,34 @@ static void cgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
static void cgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
{
__asm__ volatile (
"vlrepg %%v16,0(%2) \n\t"
"vlrepg %%v16,0(%2) \n\t"
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
"vlef %%v17,4(%2),0 \n\t"
"vlef %%v17,4(%2),2 \n\t"
"vlef %%v17,4(%2),2 \n\t"
"vflcsb %%v17,%%v17 \n\t"
"vlef %%v17,0(%2),1 \n\t"
"vlef %%v17,0(%2),3 \n\t"
"vlef %%v17,0(%2),3 \n\t"
#else
"vlef %%v17,0(%2),1 \n\t"
"vlef %%v17,0(%2),3 \n\t"
"vlef %%v17,0(%2),3 \n\t"
"vflcsb %%v17,%%v17 \n\t"
"vlef %%v17,4(%2),0 \n\t"
"vlef %%v17,4(%2),2 \n\t"
"vlef %%v17,4(%2),2 \n\t"
#endif
"xgr %%r1,%%r1 \n\t"
"srlg %%r0,%%r0,1 \n\t"
"srlg %%r0,%0,1 \n\t"
"0: \n\t"
"pfd 1,1024(%%r1,%1) \n\t"
"pfd 2,1024(%%r1,%3) \n\t"
"vlef %%v18,0(%%r1,%1),0 \n\t"
"vlef %%v18,0(%%r1,%1),1 \n\t"
"vlef %%v18,8(%%r1,%1),2 \n\t"
"vlef %%v18,8(%%r1,%1),3 \n\t"
"vlef %%v19,4(%%r1,%1),0 \n\t"
"vlef %%v19,4(%%r1,%1),1 \n\t"
"vlef %%v19,12(%%r1,%1),2 \n\t"
"vlef %%v19,12(%%r1,%1),3 \n\t"
"vlef %%v18,0(%%r1,%1),0 \n\t"
"vlef %%v18,0(%%r1,%1),1 \n\t"
"vlef %%v18,8(%%r1,%1),2 \n\t"
"vlef %%v18,8(%%r1,%1),3 \n\t"
"vlef %%v19,4(%%r1,%1),0 \n\t"
"vlef %%v19,4(%%r1,%1),1 \n\t"
"vlef %%v19,12(%%r1,%1),2 \n\t"
"vlef %%v19,12(%%r1,%1),3 \n\t"
"vl %%v0,0(%%r1,%3) \n\t"
"vfmasb %%v0,%%v18,%%v16,%%v0 \n\t"
@ -268,18 +268,18 @@ static void add_y_4(BLASLONG n, FLOAT *src, FLOAT *dest, FLOAT alpha_r, FLOAT al
{
__asm__ volatile (
#if !defined(XCONJ)
"vlrepf %%v0,%3 \n\t"
"vlef %%v1,%4,0 \n\t"
"vlef %%v1,%4,2 \n\t"
"vlrepf %%v0,%3 \n\t"
"vlef %%v1,%4,0 \n\t"
"vlef %%v1,%4,2 \n\t"
"vflcsb %%v1,%%v1 \n\t"
"vlef %%v1,%4,1 \n\t"
"vlef %%v1,%4,1 \n\t"
"vlef %%v1,%4,3 \n\t"
#else
"vlef %%v0,%3,1 \n\t"
"vlef %%v0,%3,3 \n\t"
"vlef %%v0,%3,3 \n\t"
"vflcsb %%v0,%%v0 \n\t"
"vlef %%v0,%3,0 \n\t"
"vlef %%v0,%3,2 \n\t"
"vlef %%v0,%3,2 \n\t"
"vlrepf %%v1,%4 \n\t"
#endif
"xgr %%r1,%%r1 \n\t"
@ -292,7 +292,7 @@ static void add_y_4(BLASLONG n, FLOAT *src, FLOAT *dest, FLOAT alpha_r, FLOAT al
"vl %%v17,16(%%r1,%1) \n\t"
"vl %%v18,0(%%r1,%2) \n\t"
"vl %%v19,16(%%r1,%2) \n\t"
"verllg %%v20,%%v16,32 \n\t"
"verllg %%v20,%%v16,32 \n\t"
"verllg %%v21,%%v17,32 \n\t"
"vfmasb %%v22,%%v16,%%v0,%%v18 \n\t"