From 73545a79cd8fe3e2b700478c8b9bbffb3face27e Mon Sep 17 00:00:00 2001 From: wernsaar Date: Thu, 19 Jun 2014 14:02:14 +0200 Subject: [PATCH] Ref #380: lowered stack usage for piledriver and bulldozer kernels --- kernel/x86_64/cgemm_kernel_4x2_bulldozer.S | 3 +-- kernel/x86_64/cgemm_kernel_4x2_piledriver.S | 4 +--- kernel/x86_64/dgemm_kernel_8x2_bulldozer.S | 4 ++-- kernel/x86_64/dgemm_kernel_8x2_piledriver.S | 4 ++-- kernel/x86_64/sgemm_kernel_16x2_bulldozer.S | 4 ++-- kernel/x86_64/sgemm_kernel_16x2_piledriver.S | 4 ++-- kernel/x86_64/zgemm_kernel_2x2_bulldozer.S | 4 +--- kernel/x86_64/zgemm_kernel_2x2_piledriver.S | 4 +--- 8 files changed, 12 insertions(+), 19 deletions(-) diff --git a/kernel/x86_64/cgemm_kernel_4x2_bulldozer.S b/kernel/x86_64/cgemm_kernel_4x2_bulldozer.S index 431f25a22..33d3d29dd 100644 --- a/kernel/x86_64/cgemm_kernel_4x2_bulldozer.S +++ b/kernel/x86_64/cgemm_kernel_4x2_bulldozer.S @@ -79,8 +79,7 @@ #endif -#define L_BUFFER_SIZE 512*8*4 -#define LB2_OFFSET 512*8*2 +#define L_BUFFER_SIZE 8192 #define Ndiv6 24(%rsp) #define Nmod6 32(%rsp) diff --git a/kernel/x86_64/cgemm_kernel_4x2_piledriver.S b/kernel/x86_64/cgemm_kernel_4x2_piledriver.S index 931316285..76d6810fa 100644 --- a/kernel/x86_64/cgemm_kernel_4x2_piledriver.S +++ b/kernel/x86_64/cgemm_kernel_4x2_piledriver.S @@ -104,8 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif -#define L_BUFFER_SIZE 512*8*4 -#define LB2_OFFSET 512*8*2 +#define L_BUFFER_SIZE 256*8*4 #define Ndiv6 24(%rsp) #define Nmod6 32(%rsp) @@ -116,7 +115,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define KK 72(%rsp) #define KKK 80(%rsp) #define BUFFER1 128(%rsp) -#define BUFFER2 LB2_OFFSET+128(%rsp) #if defined(OS_WINDOWS) #if L_BUFFER_SIZE > 16384 diff --git a/kernel/x86_64/dgemm_kernel_8x2_bulldozer.S b/kernel/x86_64/dgemm_kernel_8x2_bulldozer.S index 45f5c0c8f..40c5892c6 100644 --- a/kernel/x86_64/dgemm_kernel_8x2_bulldozer.S +++ b/kernel/x86_64/dgemm_kernel_8x2_bulldozer.S @@ -148,8 +148,8 @@ #endif -#define L_BUFFER_SIZE 512*8*4 -#define LB2_OFFSET 512*8*2 +#define L_BUFFER_SIZE 8192 +#define LB2_OFFSET 4096 #define Ndiv6 24(%rsp) #define Nmod6 32(%rsp) diff --git a/kernel/x86_64/dgemm_kernel_8x2_piledriver.S b/kernel/x86_64/dgemm_kernel_8x2_piledriver.S index e09e3b3f5..adc00cca3 100644 --- a/kernel/x86_64/dgemm_kernel_8x2_piledriver.S +++ b/kernel/x86_64/dgemm_kernel_8x2_piledriver.S @@ -105,8 +105,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif -#define L_BUFFER_SIZE 512*8*4 -#define LB2_OFFSET 512*8*2 +#define L_BUFFER_SIZE 8192 +#define LB2_OFFSET 4096 #define Ndiv6 24(%rsp) #define Nmod6 32(%rsp) diff --git a/kernel/x86_64/sgemm_kernel_16x2_bulldozer.S b/kernel/x86_64/sgemm_kernel_16x2_bulldozer.S index 2a034f056..9cc27184d 100644 --- a/kernel/x86_64/sgemm_kernel_16x2_bulldozer.S +++ b/kernel/x86_64/sgemm_kernel_16x2_bulldozer.S @@ -78,8 +78,8 @@ #endif -#define L_BUFFER_SIZE 512*8*4 -#define LB2_OFFSET 512*8*2 +#define L_BUFFER_SIZE 8192 +#define LB2_OFFSET 4096 #define Ndiv6 24(%rsp) #define Nmod6 32(%rsp) diff --git a/kernel/x86_64/sgemm_kernel_16x2_piledriver.S b/kernel/x86_64/sgemm_kernel_16x2_piledriver.S index dcfed6bc5..7c42f1e12 100644 --- a/kernel/x86_64/sgemm_kernel_16x2_piledriver.S +++ b/kernel/x86_64/sgemm_kernel_16x2_piledriver.S @@ -105,8 +105,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif -#define L_BUFFER_SIZE 512*8*4 -#define LB2_OFFSET 512*8*2 +#define L_BUFFER_SIZE 8192 +#define LB2_OFFSET 4096 #define Ndiv6 24(%rsp) #define Nmod6 32(%rsp) diff --git a/kernel/x86_64/zgemm_kernel_2x2_bulldozer.S b/kernel/x86_64/zgemm_kernel_2x2_bulldozer.S index e154d3031..7cf1fda8e 100644 --- a/kernel/x86_64/zgemm_kernel_2x2_bulldozer.S +++ b/kernel/x86_64/zgemm_kernel_2x2_bulldozer.S @@ -79,8 +79,7 @@ #endif -#define L_BUFFER_SIZE 512*8*4 -#define LB2_OFFSET 512*8*2 +#define L_BUFFER_SIZE 8192 #define Ndiv6 24(%rsp) #define Nmod6 32(%rsp) @@ -91,7 +90,6 @@ #define KK 72(%rsp) #define KKK 80(%rsp) #define BUFFER1 128(%rsp) -#define BUFFER2 LB2_OFFSET+128(%rsp) #if defined(OS_WINDOWS) #if L_BUFFER_SIZE > 16384 diff --git a/kernel/x86_64/zgemm_kernel_2x2_piledriver.S b/kernel/x86_64/zgemm_kernel_2x2_piledriver.S index 9f1392d78..b4c416b64 100644 --- a/kernel/x86_64/zgemm_kernel_2x2_piledriver.S +++ b/kernel/x86_64/zgemm_kernel_2x2_piledriver.S @@ -104,8 +104,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif -#define L_BUFFER_SIZE 512*8*4 -#define LB2_OFFSET 512*8*2 +#define L_BUFFER_SIZE 256*8*4 #define Ndiv6 24(%rsp) #define Nmod6 32(%rsp) @@ -116,7 +115,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define KK 72(%rsp) #define KKK 80(%rsp) #define BUFFER1 128(%rsp) -#define BUFFER2 LB2_OFFSET+128(%rsp) #if defined(OS_WINDOWS) #if L_BUFFER_SIZE > 16384