diff --git a/common_power.h b/common_power.h index 052d38828..723d949f2 100644 --- a/common_power.h +++ b/common_power.h @@ -798,7 +798,7 @@ Lmcount$lazy_ptr: #elif defined(PPC440FP2) #define BUFFER_SIZE ( 16 << 20) #elif defined(POWER8) -#define BUFFER_SIZE ( 64 << 20) +#define BUFFER_SIZE ( 32 << 20) #else #define BUFFER_SIZE ( 16 << 20) #endif diff --git a/kernel/power/cgemm_kernel_8x4_power8.S b/kernel/power/cgemm_kernel_8x4_power8.S index 91a48d190..0c462ce8e 100644 --- a/kernel/power/cgemm_kernel_8x4_power8.S +++ b/kernel/power/cgemm_kernel_8x4_power8.S @@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 512 +#define STACKSIZE 32000 #define ALPHA_R_SP 296(SP) #define ALPHA_I_SP 304(SP) #define FZERO 312(SP) @@ -136,6 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define alpha_sr vs30 #define alpha_si vs31 +#define FRAMEPOINTER r12 #define BBUFFER r14 #define L r15 @@ -161,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE PROFCODE + mr FRAMEPOINTER, SP + addi SP, SP, -STACKSIZE + addi SP, SP, -STACKSIZE + addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE li r0, 0 @@ -233,37 +238,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef linux #ifdef __64BIT__ - ld LDC, FRAMESLOT(0) + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, FRAMESLOT(0) + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) #else #ifdef DOUBLE - lwz B, FRAMESLOT(0) + STACKSIZE(SP) - lwz C, FRAMESLOT(1) + STACKSIZE(SP) - lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + 0(FRAMEPOINTER) + lwz C, FRAMESLOT(1) + 0(FRAMEPOINTER) + lwz LDC, FRAMESLOT(2) + 0(FRAMEPOINTER) #else - lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) #endif #endif #endif #ifdef TRMMKERNEL #if defined(linux) && defined(__64BIT__) - ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) #else #ifdef DOUBLE - lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + 0(FRAMEPOINTER) #else - lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) #endif #endif #endif @@ -290,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. li o32 , 32 li o48 , 48 - li T1, 512 - slwi T1, T1, 16 - add BBUFFER, A, T1 + addi BBUFFER, SP, 512+4096 + li T1, -4096 + and BBUFFER, BBUFFER, T1 #ifdef __64BIT__ @@ -392,6 +397,9 @@ L999: #endif addi SP, SP, STACKSIZE + addi SP, SP, STACKSIZE + addi SP, SP, STACKSIZE + addi SP, SP, STACKSIZE blr diff --git a/kernel/power/sgemm_kernel_16x8_power8.S b/kernel/power/sgemm_kernel_16x8_power8.S index 20c94cd94..77f3f7cfb 100644 --- a/kernel/power/sgemm_kernel_16x8_power8.S +++ b/kernel/power/sgemm_kernel_16x8_power8.S @@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #ifdef __64BIT__ -#define STACKSIZE 512 +#define STACKSIZE 32752 #define ALPHA_SP 296(SP) #define FZERO 304(SP) #else @@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define o0 0 +#define FRAMEPOINTER r12 + #define BBUFFER r14 #define o4 r15 #define o12 r16 @@ -160,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. PROLOGUE PROFCODE + mr FRAMEPOINTER, SP + addi SP, SP, -STACKSIZE + addi SP, SP, -STACKSIZE + addi SP, SP, -STACKSIZE addi SP, SP, -STACKSIZE li r0, 0 @@ -231,7 +237,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) #endif #endif @@ -239,17 +245,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(TRMMKERNEL) #if defined(linux) && defined(__64BIT__) - ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) #else #ifdef DOUBLE - lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) #else - lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) #endif #endif #endif @@ -271,9 +277,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. li o32, 32 li o48, 48 - li T1, 512 - slwi T1, T1, 16 - add BBUFFER, A, T1 + addi BBUFFER, SP, 512+4096 + li T1, -4096 + and BBUFFER, BBUFFER, T1 addi T1, SP, 300 stxsspx f1, o0 , T1 @@ -355,6 +361,9 @@ L999: #endif addi SP, SP, STACKSIZE + addi SP, SP, STACKSIZE + addi SP, SP, STACKSIZE + addi SP, SP, STACKSIZE blr diff --git a/param.h b/param.h index 84ef7671a..2efd9b2c1 100644 --- a/param.h +++ b/param.h @@ -1964,8 +1964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SNUMOPT 16 #define DNUMOPT 8 -#define GEMM_DEFAULT_OFFSET_A 131072 -#define GEMM_DEFAULT_OFFSET_B 131072 +#define GEMM_DEFAULT_OFFSET_A 4096 +#define GEMM_DEFAULT_OFFSET_B 4096 #define GEMM_DEFAULT_ALIGN 0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 16 @@ -1987,9 +1987,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM_DEFAULT_Q 720 #define ZGEMM_DEFAULT_Q 720 -#define SGEMM_DEFAULT_R 14400 +#define SGEMM_DEFAULT_R 21600 #define DGEMM_DEFAULT_R 14400 -#define CGEMM_DEFAULT_R 14400 +#define CGEMM_DEFAULT_R 16200 #define ZGEMM_DEFAULT_R 14400 #define SYMV_P 8