bugfix for cgemm_kernel_8x2_sandy.S

This commit is contained in:
wernsaar 2014-06-28 10:01:56 +02:00
parent 365e8de346
commit 35f668bb14
2 changed files with 24 additions and 21 deletions

View File

@ -812,16 +812,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef WINDOWS_ABI #ifdef WINDOWS_ABI
movq %rdi, 48(%rsp) movq %rdi, 48(%rsp)
movq %rsi, 56(%rsp) movq %rsi, 56(%rsp)
movups %xmm6, 64(%rsp) vmovups %xmm6, 64(%rsp)
movups %xmm7, 80(%rsp) vmovups %xmm7, 80(%rsp)
movups %xmm8, 96(%rsp) vmovups %xmm8, 96(%rsp)
movups %xmm9, 112(%rsp) vmovups %xmm9, 112(%rsp)
movups %xmm10, 128(%rsp) vmovups %xmm10, 128(%rsp)
movups %xmm11, 144(%rsp) vmovups %xmm11, 144(%rsp)
movups %xmm12, 160(%rsp) vmovups %xmm12, 160(%rsp)
movups %xmm13, 176(%rsp) vmovups %xmm13, 176(%rsp)
movups %xmm14, 192(%rsp) vmovups %xmm14, 192(%rsp)
movups %xmm15, 208(%rsp) vmovups %xmm15, 208(%rsp)
movq ARG1, OLD_M movq ARG1, OLD_M
movq ARG2, OLD_N movq ARG2, OLD_N
@ -834,6 +834,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
movsd OLD_OFFSET, %xmm12 movsd OLD_OFFSET, %xmm12
#endif #endif
vmovaps %xmm3, %xmm0 vmovaps %xmm3, %xmm0
vmovsd OLD_ALPHA_I, %xmm1
#else #else
movq STACKSIZE + 8(%rsp), LDC movq STACKSIZE + 8(%rsp), LDC
@ -2249,6 +2250,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.L999: .L999:
vzeroupper
movq SP, %rsp movq SP, %rsp
movq (%rsp), %rbx movq (%rsp), %rbx
movq 8(%rsp), %rbp movq 8(%rsp), %rbp
@ -2260,16 +2263,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef WINDOWS_ABI #ifdef WINDOWS_ABI
movq 48(%rsp), %rdi movq 48(%rsp), %rdi
movq 56(%rsp), %rsi movq 56(%rsp), %rsi
movups 64(%rsp), %xmm6 vmovups 64(%rsp), %xmm6
movups 80(%rsp), %xmm7 vmovups 80(%rsp), %xmm7
movups 96(%rsp), %xmm8 vmovups 96(%rsp), %xmm8
movups 112(%rsp), %xmm9 vmovups 112(%rsp), %xmm9
movups 128(%rsp), %xmm10 vmovups 128(%rsp), %xmm10
movups 144(%rsp), %xmm11 vmovups 144(%rsp), %xmm11
movups 160(%rsp), %xmm12 vmovups 160(%rsp), %xmm12
movups 176(%rsp), %xmm13 vmovups 176(%rsp), %xmm13
movups 192(%rsp), %xmm14 vmovups 192(%rsp), %xmm14
movups 208(%rsp), %xmm15 vmovups 208(%rsp), %xmm15
#endif #endif
addq $ STACKSIZE, %rsp addq $ STACKSIZE, %rsp

View File

@ -1,7 +1,7 @@
SHELL = /bin/sh SHELL = /bin/sh
PLAT = _LINUX PLAT = _LINUX
DRVOPTS = $(OPTS) DRVOPTS = $(OPTS)
LOADER = $(FORTRAN) -pthread LOADER = $(FORTRAN)
ARCHFLAGS= -ru ARCHFLAGS= -ru
#RANLIB = ranlib #RANLIB = ranlib