Save and restore input argument 8 (lda4)
Fixes miscompilation with gcc9 -ftree-vectorize (related to issue #2009)
This commit is contained in:
parent
69edc5bbe7
commit
46e415b140
|
@ -26,7 +26,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define HAVE_KERNEL_4x8 1
|
#define HAVE_KERNEL_4x8 1
|
||||||
static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLONG lda4, FLOAT *alpha) __attribute__ ((noinline));
|
static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLONG lda4, FLOAT *alpha) __attribute__ ((noinline));
|
||||||
|
|
||||||
|
@ -49,6 +48,8 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
|
||||||
|
|
||||||
"vbroadcastss (%9), %%ymm6 \n\t" // alpha
|
"vbroadcastss (%9), %%ymm6 \n\t" // alpha
|
||||||
|
|
||||||
|
"movq %8, %%xmm10 \n\t" //save lda
|
||||||
|
|
||||||
"testq $0x04, %1 \n\t"
|
"testq $0x04, %1 \n\t"
|
||||||
"jz 2f \n\t"
|
"jz 2f \n\t"
|
||||||
|
|
||||||
|
@ -151,6 +152,7 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
|
||||||
|
|
||||||
"4: \n\t"
|
"4: \n\t"
|
||||||
"vzeroupper \n\t"
|
"vzeroupper \n\t"
|
||||||
|
"movq %%xmm10, %8 \n\t" //restore lda
|
||||||
|
|
||||||
:
|
:
|
||||||
"+r" (i), // 0
|
"+r" (i), // 0
|
||||||
|
@ -170,6 +172,7 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
|
||||||
"%xmm4", "%xmm5",
|
"%xmm4", "%xmm5",
|
||||||
"%xmm6", "%xmm7",
|
"%xmm6", "%xmm7",
|
||||||
"%xmm8", "%xmm9",
|
"%xmm8", "%xmm9",
|
||||||
|
"%xmm10",
|
||||||
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
|
"%xmm12", "%xmm13", "%xmm14", "%xmm15",
|
||||||
"memory"
|
"memory"
|
||||||
);
|
);
|
||||||
|
@ -177,7 +180,6 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define HAVE_KERNEL_4x4 1
|
#define HAVE_KERNEL_4x4 1
|
||||||
static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT *alpha) __attribute__ ((noinline));
|
static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT *alpha) __attribute__ ((noinline));
|
||||||
|
|
||||||
|
@ -196,6 +198,7 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
|
||||||
|
|
||||||
"vbroadcastss (%8), %%ymm6 \n\t" // alpha
|
"vbroadcastss (%8), %%ymm6 \n\t" // alpha
|
||||||
|
|
||||||
|
|
||||||
"testq $0x04, %1 \n\t"
|
"testq $0x04, %1 \n\t"
|
||||||
"jz 2f \n\t"
|
"jz 2f \n\t"
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue