Tag %1 and %2 as both input and output operands

fix from #1292 extended to the other gemv microkernels
This commit is contained in:
Martin Kroeker 2017-12-31 18:03:36 +01:00 committed by GitHub
parent 281a2b952f
commit b973990df2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 66 additions and 66 deletions

View File

@ -158,9 +158,9 @@ static void cgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"
:
:
"r" (i), // 0
"r" (n1), // 1
"+r" (i), // 0
"+r" (n1) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -282,9 +282,9 @@ static void cgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"
:
:
"r" (i), // 0
"r" (n1), // 1
"+r" (i), // 0
"+r" (n1) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -389,9 +389,9 @@ static void cgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"
:
:
"r" (i), // 0
"r" (n1), // 1
"+r" (i), // 0
"+r" (n1) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap), // 4
@ -519,9 +519,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a
"vzeroupper \n\t"
:
:
"r" (i), // 0
"r" (n1), // 1
"+r" (i), // 0
"+r" (n1) // 1
:
"r" (src), // 2
"r" (dest), // 3
"r" (&alpha_r), // 4

View File

@ -231,9 +231,9 @@ static void cgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -392,9 +392,9 @@ static void cgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -521,9 +521,9 @@ static void cgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT *
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap), // 4

View File

@ -149,9 +149,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"movups %%xmm5 , -16(%3,%0,8) \n\t" // 2 * y
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4

View File

@ -124,9 +124,9 @@ static void dgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -224,9 +224,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4

View File

@ -183,9 +183,9 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"4: \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -246,9 +246,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"jnz 1b \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4

View File

@ -106,9 +106,9 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"jnz 1b \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -181,9 +181,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"jnz 1b \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4

View File

@ -196,9 +196,9 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -345,9 +345,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4

View File

@ -126,9 +126,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vmovss %%xmm7, 12(%3) \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4

View File

@ -78,9 +78,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"movss %%xmm7, 12(%3) \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4

View File

@ -152,9 +152,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4

View File

@ -125,9 +125,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"2: \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -231,9 +231,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"2: \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -321,9 +321,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -393,9 +393,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap) // 4
@ -493,9 +493,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (src), // 2
"r" (dest), // 3
"r" (&alpha_r), // 4

View File

@ -127,9 +127,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -220,9 +220,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -295,9 +295,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap) // 4
@ -396,9 +396,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (src), // 2
"r" (dest), // 3
"r" (&alpha_r), // 4

View File

@ -198,9 +198,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -333,9 +333,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@ -437,9 +437,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT *
"vzeroupper \n\t"
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"r" (y), // 3
"r" (ap), // 4