diff --git a/kernel/x86_64/KERNEL.PILEDRIVER b/kernel/x86_64/KERNEL.PILEDRIVER index 0f5b52616..11c9ad54b 100644 --- a/kernel/x86_64/KERNEL.PILEDRIVER +++ b/kernel/x86_64/KERNEL.PILEDRIVER @@ -2,7 +2,7 @@ SGEMVNKERNEL = sgemv_n.S SGEMVTKERNEL = sgemv_t.S ZGEMVNKERNEL = zgemv_n_dup.S -ZGEMVTKERNEL = zgemv_t_dup.S +ZGEMVTKERNEL = zgemv_t.S DGEMVNKERNEL = dgemv_n_bulldozer.S DGEMVTKERNEL = dgemv_t_bulldozer.S diff --git a/kernel/x86_64/cgemm_kernel_4x2_piledriver.S b/kernel/x86_64/cgemm_kernel_4x2_piledriver.S index 76d6810fa..72deee12f 100644 --- a/kernel/x86_64/cgemm_kernel_4x2_piledriver.S +++ b/kernel/x86_64/cgemm_kernel_4x2_piledriver.S @@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ /********************************************************************* * -* 2013/10/31 Saar +* 2014/06/28 Saar * BLASTEST : OK * CTEST : OK * TEST : OK @@ -546,16 +546,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef WINDOWS_ABI movq %rdi, 48(%rsp) movq %rsi, 56(%rsp) - movups %xmm6, 64(%rsp) - movups %xmm7, 80(%rsp) - movups %xmm8, 96(%rsp) - movups %xmm9, 112(%rsp) - movups %xmm10, 128(%rsp) - movups %xmm11, 144(%rsp) - movups %xmm12, 160(%rsp) - movups %xmm13, 176(%rsp) - movups %xmm14, 192(%rsp) - movups %xmm15, 208(%rsp) + vmovups %xmm6, 64(%rsp) + vmovups %xmm7, 80(%rsp) + vmovups %xmm8, 96(%rsp) + vmovups %xmm9, 112(%rsp) + vmovups %xmm10, 128(%rsp) + vmovups %xmm11, 144(%rsp) + vmovups %xmm12, 160(%rsp) + vmovups %xmm13, 176(%rsp) + vmovups %xmm14, 192(%rsp) + vmovups %xmm15, 208(%rsp) movq ARG1, OLD_M movq ARG2, OLD_N @@ -568,6 +568,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. movsd OLD_OFFSET, %xmm12 #endif vmovaps %xmm3, %xmm0 + vmovsd OLD_ALPHA_I, %xmm1 #else movq STACKSIZE + 8(%rsp), LDC @@ -1889,6 +1890,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .L999: + vzeroupper + movq SP, %rsp movq (%rsp), %rbx movq 8(%rsp), %rbp @@ -1900,16 +1903,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef WINDOWS_ABI movq 48(%rsp), %rdi movq 56(%rsp), %rsi - movups 64(%rsp), %xmm6 - movups 80(%rsp), %xmm7 - movups 96(%rsp), %xmm8 - movups 112(%rsp), %xmm9 - movups 128(%rsp), %xmm10 - movups 144(%rsp), %xmm11 - movups 160(%rsp), %xmm12 - movups 176(%rsp), %xmm13 - movups 192(%rsp), %xmm14 - movups 208(%rsp), %xmm15 + vmovups 64(%rsp), %xmm6 + vmovups 80(%rsp), %xmm7 + vmovups 96(%rsp), %xmm8 + vmovups 112(%rsp), %xmm9 + vmovups 128(%rsp), %xmm10 + vmovups 144(%rsp), %xmm11 + vmovups 160(%rsp), %xmm12 + vmovups 176(%rsp), %xmm13 + vmovups 192(%rsp), %xmm14 + vmovups 208(%rsp), %xmm15 #endif addq $STACKSIZE, %rsp diff --git a/kernel/x86_64/zgemm_kernel_2x2_piledriver.S b/kernel/x86_64/zgemm_kernel_2x2_piledriver.S index b4c416b64..848b6f237 100644 --- a/kernel/x86_64/zgemm_kernel_2x2_piledriver.S +++ b/kernel/x86_64/zgemm_kernel_2x2_piledriver.S @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /********************************************************************* * -* 2013/10/30 Saar +* 2014/06/28 Saar * BLASTEST : OK * CTEST : OK * TEST : OK @@ -437,16 +437,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef WINDOWS_ABI movq %rdi, 48(%rsp) movq %rsi, 56(%rsp) - movups %xmm6, 64(%rsp) - movups %xmm7, 80(%rsp) - movups %xmm8, 96(%rsp) - movups %xmm9, 112(%rsp) - movups %xmm10, 128(%rsp) - movups %xmm11, 144(%rsp) - movups %xmm12, 160(%rsp) - movups %xmm13, 176(%rsp) - movups %xmm14, 192(%rsp) - movups %xmm15, 208(%rsp) + vmovups %xmm6, 64(%rsp) + vmovups %xmm7, 80(%rsp) + vmovups %xmm8, 96(%rsp) + vmovups %xmm9, 112(%rsp) + vmovups %xmm10, 128(%rsp) + vmovups %xmm11, 144(%rsp) + vmovups %xmm12, 160(%rsp) + vmovups %xmm13, 176(%rsp) + vmovups %xmm14, 192(%rsp) + vmovups %xmm15, 208(%rsp) movq ARG1, OLD_M movq ARG2, OLD_N @@ -456,14 +456,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. movq OLD_C, C movq OLD_LDC, LDC #ifdef TRMMKERNEL - movsd OLD_OFFSET, %xmm12 + vmovsd OLD_OFFSET, %xmm12 #endif vmovaps %xmm3, %xmm0 + vmovsd OLD_ALPHA_I, %xmm1 #else movq STACKSIZE + 8(%rsp), LDC #ifdef TRMMKERNEL - movsd STACKSIZE + 16(%rsp), %xmm12 + vmovsd STACKSIZE + 16(%rsp), %xmm12 #endif #endif @@ -1397,6 +1398,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .L999: + vzeroupper + movq SP, %rsp movq (%rsp), %rbx movq 8(%rsp), %rbp @@ -1408,16 +1411,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef WINDOWS_ABI movq 48(%rsp), %rdi movq 56(%rsp), %rsi - movups 64(%rsp), %xmm6 - movups 80(%rsp), %xmm7 - movups 96(%rsp), %xmm8 - movups 112(%rsp), %xmm9 - movups 128(%rsp), %xmm10 - movups 144(%rsp), %xmm11 - movups 160(%rsp), %xmm12 - movups 176(%rsp), %xmm13 - movups 192(%rsp), %xmm14 - movups 208(%rsp), %xmm15 + vmovups 64(%rsp), %xmm6 + vmovups 80(%rsp), %xmm7 + vmovups 96(%rsp), %xmm8 + vmovups 112(%rsp), %xmm9 + vmovups 128(%rsp), %xmm10 + vmovups 144(%rsp), %xmm11 + vmovups 160(%rsp), %xmm12 + vmovups 176(%rsp), %xmm13 + vmovups 192(%rsp), %xmm14 + vmovups 208(%rsp), %xmm15 #endif addq $STACKSIZE, %rsp