From 7ba4fe5afbc6fffdbc6cd2cc6f7ff72050135996 Mon Sep 17 00:00:00 2001 From: Matthew Brandyberry Date: Tue, 21 Jul 2015 12:45:12 -0500 Subject: [PATCH 01/29] ppc64le platform support (ELF ABI v2) --- common_power.h | 31 ++++++++++++++++++ kernel/power/axpy.S | 2 +- kernel/power/axpy_ppc440.S | 2 +- kernel/power/gemm_beta.S | 18 +++++------ kernel/power/gemm_kernel.S | 38 +++++++++++----------- kernel/power/gemm_kernel_altivec.S | 2 +- kernel/power/gemm_kernel_altivec_cell.S | 2 +- kernel/power/gemm_kernel_altivec_g4.S | 2 +- kernel/power/gemm_kernel_cell.S | 38 +++++++++++----------- kernel/power/gemm_kernel_g4.S | 10 +++--- kernel/power/gemm_kernel_power3.S | 30 +++++++++--------- kernel/power/gemm_kernel_power6.S | 10 +++--- kernel/power/gemm_kernel_ppc440.S | 10 +++--- kernel/power/gemv_n.S | 30 +++++++++--------- kernel/power/gemv_n_ppc440.S | 30 +++++++++--------- kernel/power/gemv_t.S | 30 +++++++++--------- kernel/power/gemv_t_ppc440.S | 30 +++++++++--------- kernel/power/ger.S | 30 +++++++++--------- kernel/power/swap.S | 2 +- kernel/power/symv_L.S | 20 ++++++------ kernel/power/symv_U.S | 20 ++++++------ kernel/power/trsm_kernel_LN.S | 38 +++++++++++----------- kernel/power/trsm_kernel_LT.S | 38 +++++++++++----------- kernel/power/trsm_kernel_RT.S | 38 +++++++++++----------- kernel/power/trsm_kernel_cell_LN.S | 38 +++++++++++----------- kernel/power/trsm_kernel_cell_LT.S | 38 +++++++++++----------- kernel/power/trsm_kernel_cell_RT.S | 38 +++++++++++----------- kernel/power/trsm_kernel_power6_LN.S | 10 +++--- kernel/power/trsm_kernel_power6_LT.S | 10 +++--- kernel/power/trsm_kernel_power6_RT.S | 10 +++--- kernel/power/trsm_kernel_ppc440_LN.S | 10 +++--- kernel/power/trsm_kernel_ppc440_LT.S | 10 +++--- kernel/power/trsm_kernel_ppc440_RT.S | 10 +++--- kernel/power/zaxpy.S | 12 +++---- kernel/power/zaxpy_ppc440.S | 12 +++---- kernel/power/zgemm_beta.S | 18 +++++------ kernel/power/zgemm_kernel.S | 40 ++++++++++++------------ kernel/power/zgemm_kernel_altivec.S | 32 +++++++++---------- kernel/power/zgemm_kernel_altivec_cell.S | 32 +++++++++---------- kernel/power/zgemm_kernel_altivec_g4.S | 12 +++---- kernel/power/zgemm_kernel_cell.S | 40 ++++++++++++------------ kernel/power/zgemm_kernel_g4.S | 20 ++++++------ kernel/power/zgemm_kernel_power3.S | 32 +++++++++---------- kernel/power/zgemm_kernel_power6.S | 20 ++++++------ kernel/power/zgemm_kernel_ppc440.S | 20 ++++++------ kernel/power/zgemv_n.S | 30 +++++++++--------- kernel/power/zgemv_n_ppc440.S | 40 ++++++++++++------------ kernel/power/zgemv_t.S | 40 ++++++++++++------------ kernel/power/zgemv_t_ppc440.S | 40 ++++++++++++------------ kernel/power/zger.S | 40 ++++++++++++------------ kernel/power/zswap.S | 12 +++---- kernel/power/zsymv_L.S | 30 +++++++++--------- kernel/power/zsymv_U.S | 30 +++++++++--------- kernel/power/ztrsm_kernel_LN.S | 40 ++++++++++++------------ kernel/power/ztrsm_kernel_LT.S | 40 ++++++++++++------------ kernel/power/ztrsm_kernel_RT.S | 40 ++++++++++++------------ kernel/power/ztrsm_kernel_cell_LN.S | 20 ++++++------ kernel/power/ztrsm_kernel_cell_LT.S | 40 ++++++++++++------------ kernel/power/ztrsm_kernel_cell_RT.S | 20 ++++++------ kernel/power/ztrsm_kernel_power6_LN.S | 20 ++++++------ kernel/power/ztrsm_kernel_power6_LT.S | 20 ++++++------ kernel/power/ztrsm_kernel_power6_RT.S | 20 ++++++------ kernel/power/ztrsm_kernel_ppc440_LN.S | 20 ++++++------ kernel/power/ztrsm_kernel_ppc440_LT.S | 20 ++++++------ kernel/power/ztrsm_kernel_ppc440_RT.S | 20 ++++++------ 65 files changed, 789 insertions(+), 758 deletions(-) diff --git a/common_power.h b/common_power.h index f88f527bd..e9b5cb630 100644 --- a/common_power.h +++ b/common_power.h @@ -495,6 +495,15 @@ static inline int blas_quickdivide(blasint x, blasint y){ REALNAME: #define EPILOGUE .size REALNAME, .-REALNAME #else +#if _CALL_ELF == 2 +#define PROLOGUE \ + .section .text;\ + .align 6;\ + .globl REALNAME;\ + .type REALNAME, @function;\ +REALNAME: +#define EPILOGUE .size REALNAME, .-REALNAME +#else #define PROLOGUE \ .section .text;\ .align 5;\ @@ -514,6 +523,7 @@ REALNAME:;\ .size .REALNAME, .-.REALNAME; \ .section .note.GNU-stack,"",@progbits #endif +#endif #ifdef PROFILE #ifndef __64BIT__ @@ -792,4 +802,25 @@ Lmcount$lazy_ptr: #ifndef MAP_ANONYMOUS #define MAP_ANONYMOUS MAP_ANON #endif + +#ifdef OS_LINUX +#ifndef __64BIT__ +#define FRAMESLOT(X) (((X) * 4) + 8) +#else +#if _CALL_ELF == 2 +#define FRAMESLOT(X) (((X) * 8) + 96) +#else +#define FRAMESLOT(X) (((X) * 8) + 112) +#endif +#endif +#endif + +#if defined(OS_AIX) || defined(OS_DARWIN) +#ifndef __64BIT__ +#define FRAMESLOT(X) (((X) * 4) + 56) +#else +#define FRAMESLOT(X) (((X) * 8) + 112) +#endif +#endif + #endif diff --git a/kernel/power/axpy.S b/kernel/power/axpy.S index 190f82d6b..fb9789da4 100644 --- a/kernel/power/axpy.S +++ b/kernel/power/axpy.S @@ -106,7 +106,7 @@ stfd f24, 80(SP) #if (defined(_AIX) || defined(__APPLE__)) && !defined(__64BIT__) && defined(DOUBLE) - lwz INCY, 56 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) #endif fmr ALPHA, f1 diff --git a/kernel/power/axpy_ppc440.S b/kernel/power/axpy_ppc440.S index df3f25e5f..81a660e4d 100644 --- a/kernel/power/axpy_ppc440.S +++ b/kernel/power/axpy_ppc440.S @@ -104,7 +104,7 @@ stfd f24, 80(SP) #if (defined(_AIX) || defined(__APPLE__)) && !defined(__64BIT__) && defined(DOUBLE) - lwz INCY, 56 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) #endif fmr ALPHA, f1 diff --git a/kernel/power/gemm_beta.S b/kernel/power/gemm_beta.S index 969f54c61..62d7761ec 100644 --- a/kernel/power/gemm_beta.S +++ b/kernel/power/gemm_beta.S @@ -64,24 +64,24 @@ #ifdef linux #ifndef __64BIT__ - lwz LDC, 8 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #else - ld C, 112 + STACKSIZE(SP) - ld LDC, 120 + STACKSIZE(SP) + ld C, FRAMESLOT(0) + STACKSIZE(SP) + ld LDC, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld C, 112 + STACKSIZE(SP) - ld LDC, 120 + STACKSIZE(SP) + ld C, FRAMESLOT(0) + STACKSIZE(SP) + ld LDC, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz C, 56 + STACKSIZE(SP) - lwz LDC, 60 + STACKSIZE(SP) + lwz C, FRAMESLOT(0) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/gemm_kernel.S b/kernel/power/gemm_kernel.S index cae2fabca..e5e9ec346 100644 --- a/kernel/power/gemm_kernel.S +++ b/kernel/power/gemm_kernel.S @@ -179,7 +179,7 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif @@ -187,17 +187,17 @@ #if defined(TRMMKERNEL) #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -231,29 +231,29 @@ #ifdef linux #ifndef __64BIT__ mr PREA, r10 - lwz PREB, 8 + STACKSIZE(SP) - lwz PREC, 12 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(1) + STACKSIZE(SP) #else - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 60 + STACKSIZE(SP) - lwz PREB, 64 + STACKSIZE(SP) - lwz PREC, 68 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz PREA, 56 + STACKSIZE(SP) - lwz PREB, 60 + STACKSIZE(SP) - lwz PREC, 64 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/gemm_kernel_altivec.S b/kernel/power/gemm_kernel_altivec.S index 8a525ef22..6c7e78319 100644 --- a/kernel/power/gemm_kernel_altivec.S +++ b/kernel/power/gemm_kernel_altivec.S @@ -241,7 +241,7 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/gemm_kernel_altivec_cell.S b/kernel/power/gemm_kernel_altivec_cell.S index ac750c2e8..b7445a1f6 100644 --- a/kernel/power/gemm_kernel_altivec_cell.S +++ b/kernel/power/gemm_kernel_altivec_cell.S @@ -247,7 +247,7 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/gemm_kernel_altivec_g4.S b/kernel/power/gemm_kernel_altivec_g4.S index 26339afeb..548150143 100644 --- a/kernel/power/gemm_kernel_altivec_g4.S +++ b/kernel/power/gemm_kernel_altivec_g4.S @@ -241,7 +241,7 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/gemm_kernel_cell.S b/kernel/power/gemm_kernel_cell.S index 1dbacc7f9..f3d3b8325 100644 --- a/kernel/power/gemm_kernel_cell.S +++ b/kernel/power/gemm_kernel_cell.S @@ -185,7 +185,7 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif @@ -193,17 +193,17 @@ #if defined(TRMMKERNEL) #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -229,29 +229,29 @@ #ifdef linux #ifndef __64BIT__ mr PREA, r10 - lwz PREB, 8 + STACKSIZE(SP) - lwz PREC, 12 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(1) + STACKSIZE(SP) #else - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ -xc ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) +xc ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 60 + STACKSIZE(SP) - lwz PREB, 64 + STACKSIZE(SP) - lwz PREC, 68 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz PREA, 56 + STACKSIZE(SP) - lwz PREB, 60 + STACKSIZE(SP) - lwz PREC, 64 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/gemm_kernel_g4.S b/kernel/power/gemm_kernel_g4.S index b6c849965..259f04c4e 100644 --- a/kernel/power/gemm_kernel_g4.S +++ b/kernel/power/gemm_kernel_g4.S @@ -177,7 +177,7 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif @@ -185,17 +185,17 @@ #if defined(TRMMKERNEL) #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/gemm_kernel_power3.S b/kernel/power/gemm_kernel_power3.S index 6fe2def67..4a6b5da62 100644 --- a/kernel/power/gemm_kernel_power3.S +++ b/kernel/power/gemm_kernel_power3.S @@ -168,7 +168,7 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif @@ -190,29 +190,29 @@ #ifdef linux #ifndef __64BIT__ mr PREA, r10 - lwz PREB, 8 + STACKSIZE(SP) - lwz PREC, 12 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(1) + STACKSIZE(SP) #else - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 60 + STACKSIZE(SP) - lwz PREB, 64 + STACKSIZE(SP) - lwz PREC, 68 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz PREA, 56 + STACKSIZE(SP) - lwz PREB, 60 + STACKSIZE(SP) - lwz PREC, 64 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/gemm_kernel_power6.S b/kernel/power/gemm_kernel_power6.S index 5f8fa76ce..1a412c4fb 100644 --- a/kernel/power/gemm_kernel_power6.S +++ b/kernel/power/gemm_kernel_power6.S @@ -176,7 +176,7 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif @@ -184,17 +184,17 @@ #if defined(TRMMKERNEL) #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/gemm_kernel_ppc440.S b/kernel/power/gemm_kernel_ppc440.S index 2e86d5130..b128beb38 100644 --- a/kernel/power/gemm_kernel_ppc440.S +++ b/kernel/power/gemm_kernel_ppc440.S @@ -176,7 +176,7 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif @@ -184,17 +184,17 @@ #if defined(TRMMKERNEL) #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/gemv_n.S b/kernel/power/gemv_n.S index 2b19f0a4e..77587ecb1 100644 --- a/kernel/power/gemv_n.S +++ b/kernel/power/gemv_n.S @@ -248,31 +248,31 @@ #ifdef linux #ifndef __64BIT__ - lwz INCY, 8 + STACKSIZE(SP) - lwz BUFFER, 12 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) - lwz BUFFER, 68 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz Y, 56 + STACKSIZE(SP) - lwz INCY, 60 + STACKSIZE(SP) - lwz BUFFER, 64 + STACKSIZE(SP) + lwz Y, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(1) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/gemv_n_ppc440.S b/kernel/power/gemv_n_ppc440.S index baedebc2b..beb21200a 100644 --- a/kernel/power/gemv_n_ppc440.S +++ b/kernel/power/gemv_n_ppc440.S @@ -201,31 +201,31 @@ #ifdef linux #ifndef __64BIT__ - lwz INCY, 8 + STACKSIZE(SP) - lwz BUFFER, 12 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) - lwz BUFFER, 68 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz Y, 56 + STACKSIZE(SP) - lwz INCY, 60 + STACKSIZE(SP) - lwz BUFFER, 64 + STACKSIZE(SP) + lwz Y, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(1) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/gemv_t.S b/kernel/power/gemv_t.S index 005e5d56c..817a60b86 100644 --- a/kernel/power/gemv_t.S +++ b/kernel/power/gemv_t.S @@ -257,31 +257,31 @@ #ifdef linux #ifndef __64BIT__ - lwz INCY, 8 + STACKSIZE(SP) - lwz BUFFER, 12 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) - lwz BUFFER, 68 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz Y, 56 + STACKSIZE(SP) - lwz INCY, 60 + STACKSIZE(SP) - lwz BUFFER, 64 + STACKSIZE(SP) + lwz Y, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(1) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/gemv_t_ppc440.S b/kernel/power/gemv_t_ppc440.S index 62433af19..6e560db6c 100644 --- a/kernel/power/gemv_t_ppc440.S +++ b/kernel/power/gemv_t_ppc440.S @@ -192,31 +192,31 @@ #ifdef linux #ifndef __64BIT__ - lwz INCY, 8 + STACKSIZE(SP) - lwz BUFFER, 12 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) - lwz BUFFER, 68 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz Y, 56 + STACKSIZE(SP) - lwz INCY, 60 + STACKSIZE(SP) - lwz BUFFER, 64 + STACKSIZE(SP) + lwz Y, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(1) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/ger.S b/kernel/power/ger.S index bc10bf40d..fd397ce8c 100644 --- a/kernel/power/ger.S +++ b/kernel/power/ger.S @@ -226,31 +226,31 @@ #ifdef linux #ifndef __64BIT__ - lwz LDA, 8 + STACKSIZE(SP) - lwz BUFFER, 12 + STACKSIZE(SP) + lwz LDA, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #else - ld A, 112 + STACKSIZE(SP) - ld LDA, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld A, FRAMESLOT(0) + STACKSIZE(SP) + ld LDA, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz INCY, 56 + STACKSIZE(SP) - lwz A, 60 + STACKSIZE(SP) - lwz LDA, 64 + STACKSIZE(SP) - lwz BUFFER, 68 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz A, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDA, FRAMESLOT(2) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz A, 56 + STACKSIZE(SP) - lwz LDA, 60 + STACKSIZE(SP) - lwz BUFFER, 64 + STACKSIZE(SP) + lwz A, FRAMESLOT(0) + STACKSIZE(SP) + lwz LDA, FRAMESLOT(1) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #else - ld A, 112 + STACKSIZE(SP) - ld LDA, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld A, FRAMESLOT(0) + STACKSIZE(SP) + ld LDA, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/swap.S b/kernel/power/swap.S index f8b56d472..e862b17bb 100644 --- a/kernel/power/swap.S +++ b/kernel/power/swap.S @@ -115,7 +115,7 @@ stfd f31, 136(SP) #if (defined(_AIX) || defined(__APPLE__)) && !defined(__64BIT__) && defined(DOUBLE) - lwz INCY, 56 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) #endif slwi INCX, INCX, BASE_SHIFT diff --git a/kernel/power/symv_L.S b/kernel/power/symv_L.S index fbf735abd..f7d768c50 100644 --- a/kernel/power/symv_L.S +++ b/kernel/power/symv_L.S @@ -250,26 +250,26 @@ #ifdef linux #ifndef __64BIT__ - lwz BUFFER, 56 + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(0) + STACKSIZE(SP) #else - ld INCY, 112 + STACKSIZE(SP) - ld BUFFER, 120 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz Y, 56 + STACKSIZE(SP) - lwz INCY, 60 + STACKSIZE(SP) - lwz BUFFER, 64 + STACKSIZE(SP) + lwz Y, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(1) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz INCY, 56 + STACKSIZE(SP) - lwz BUFFER, 60 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #endif #else - ld INCY, 112 + STACKSIZE(SP) - ld BUFFER, 120 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/symv_U.S b/kernel/power/symv_U.S index ec1aeea39..d8e082397 100644 --- a/kernel/power/symv_U.S +++ b/kernel/power/symv_U.S @@ -249,26 +249,26 @@ #ifdef linux #ifndef __64BIT__ - lwz BUFFER, 56 + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(0) + STACKSIZE(SP) #else - ld INCY, 112 + STACKSIZE(SP) - ld BUFFER, 120 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz Y, 56 + STACKSIZE(SP) - lwz INCY, 60 + STACKSIZE(SP) - lwz BUFFER, 64 + STACKSIZE(SP) + lwz Y, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(1) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz INCY, 56 + STACKSIZE(SP) - lwz BUFFER, 60 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #endif #else - ld INCY, 112 + STACKSIZE(SP) - ld BUFFER, 120 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/trsm_kernel_LN.S b/kernel/power/trsm_kernel_LN.S index 0c13a25a4..7983c573b 100644 --- a/kernel/power/trsm_kernel_LN.S +++ b/kernel/power/trsm_kernel_LN.S @@ -174,24 +174,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -239,29 +239,29 @@ #ifdef linux #ifndef __64BIT__ mr PREA, r10 - lwz PREB, 8 + STACKSIZE(SP) - lwz PREC, 12 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(1) + STACKSIZE(SP) #else - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 60 + STACKSIZE(SP) - lwz PREB, 64 + STACKSIZE(SP) - lwz PREC, 68 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz PREA, 56 + STACKSIZE(SP) - lwz PREB, 60 + STACKSIZE(SP) - lwz PREC, 64 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_LT.S b/kernel/power/trsm_kernel_LT.S index 06481e5e9..c561fd014 100644 --- a/kernel/power/trsm_kernel_LT.S +++ b/kernel/power/trsm_kernel_LT.S @@ -174,24 +174,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -260,29 +260,29 @@ #ifdef linux #ifndef __64BIT__ mr PREA, r10 - lwz PREB, 8 + STACKSIZE(SP) - lwz PREC, 12 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(1) + STACKSIZE(SP) #else - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 60 + STACKSIZE(SP) - lwz PREB, 64 + STACKSIZE(SP) - lwz PREC, 68 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz PREA, 56 + STACKSIZE(SP) - lwz PREB, 60 + STACKSIZE(SP) - lwz PREC, 64 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_RT.S b/kernel/power/trsm_kernel_RT.S index 1777ba86d..07b88402c 100644 --- a/kernel/power/trsm_kernel_RT.S +++ b/kernel/power/trsm_kernel_RT.S @@ -174,24 +174,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -257,29 +257,29 @@ #ifdef linux #ifndef __64BIT__ mr PREA, r10 - lwz PREB, 8 + STACKSIZE(SP) - lwz PREC, 12 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(1) + STACKSIZE(SP) #else - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 60 + STACKSIZE(SP) - lwz PREB, 64 + STACKSIZE(SP) - lwz PREC, 68 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz PREA, 56 + STACKSIZE(SP) - lwz PREB, 60 + STACKSIZE(SP) - lwz PREC, 64 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_cell_LN.S b/kernel/power/trsm_kernel_cell_LN.S index b5ed925ed..803530cbb 100644 --- a/kernel/power/trsm_kernel_cell_LN.S +++ b/kernel/power/trsm_kernel_cell_LN.S @@ -174,24 +174,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -234,29 +234,29 @@ #ifdef linux #ifndef __64BIT__ mr PREA, r10 - lwz PREB, 8 + STACKSIZE(SP) - lwz PREC, 12 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(1) + STACKSIZE(SP) #else - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 60 + STACKSIZE(SP) - lwz PREB, 64 + STACKSIZE(SP) - lwz PREC, 68 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz PREA, 56 + STACKSIZE(SP) - lwz PREB, 60 + STACKSIZE(SP) - lwz PREC, 64 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_cell_LT.S b/kernel/power/trsm_kernel_cell_LT.S index cdc6f7514..105e7d43c 100644 --- a/kernel/power/trsm_kernel_cell_LT.S +++ b/kernel/power/trsm_kernel_cell_LT.S @@ -174,24 +174,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -260,29 +260,29 @@ #ifdef linux #ifndef __64BIT__ mr PREA, r10 - lwz PREB, 8 + STACKSIZE(SP) - lwz PREC, 12 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(1) + STACKSIZE(SP) #else - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 60 + STACKSIZE(SP) - lwz PREB, 64 + STACKSIZE(SP) - lwz PREC, 68 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz PREA, 56 + STACKSIZE(SP) - lwz PREB, 60 + STACKSIZE(SP) - lwz PREC, 64 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_cell_RT.S b/kernel/power/trsm_kernel_cell_RT.S index 731f52c19..a54a261cb 100644 --- a/kernel/power/trsm_kernel_cell_RT.S +++ b/kernel/power/trsm_kernel_cell_RT.S @@ -174,24 +174,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -234,29 +234,29 @@ #ifdef linux #ifndef __64BIT__ mr PREA, r10 - lwz PREB, 8 + STACKSIZE(SP) - lwz PREC, 12 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(1) + STACKSIZE(SP) #else - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 112 + STACKSIZE(SP) - ld PREB, 120 + STACKSIZE(SP) - ld PREC, 128 + STACKSIZE(SP) + ld PREA, FRAMESLOT(0) + STACKSIZE(SP) + ld PREB, FRAMESLOT(1) + STACKSIZE(SP) + ld PREC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 60 + STACKSIZE(SP) - lwz PREB, 64 + STACKSIZE(SP) - lwz PREC, 68 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz PREA, 56 + STACKSIZE(SP) - lwz PREB, 60 + STACKSIZE(SP) - lwz PREC, 64 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(0) + STACKSIZE(SP) + lwz PREB, FRAMESLOT(1) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_power6_LN.S b/kernel/power/trsm_kernel_power6_LN.S index 2f85cd14b..937a6761a 100644 --- a/kernel/power/trsm_kernel_power6_LN.S +++ b/kernel/power/trsm_kernel_power6_LN.S @@ -173,24 +173,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_power6_LT.S b/kernel/power/trsm_kernel_power6_LT.S index 6b3d21b14..924f00ec0 100644 --- a/kernel/power/trsm_kernel_power6_LT.S +++ b/kernel/power/trsm_kernel_power6_LT.S @@ -174,24 +174,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_power6_RT.S b/kernel/power/trsm_kernel_power6_RT.S index f6b2e5cfb..40ee5e28d 100644 --- a/kernel/power/trsm_kernel_power6_RT.S +++ b/kernel/power/trsm_kernel_power6_RT.S @@ -173,24 +173,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_ppc440_LN.S b/kernel/power/trsm_kernel_ppc440_LN.S index 265e79e0f..6b7312101 100644 --- a/kernel/power/trsm_kernel_ppc440_LN.S +++ b/kernel/power/trsm_kernel_ppc440_LN.S @@ -185,24 +185,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_ppc440_LT.S b/kernel/power/trsm_kernel_ppc440_LT.S index de7ff7415..28b109b96 100644 --- a/kernel/power/trsm_kernel_ppc440_LT.S +++ b/kernel/power/trsm_kernel_ppc440_LT.S @@ -170,24 +170,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/trsm_kernel_ppc440_RT.S b/kernel/power/trsm_kernel_ppc440_RT.S index e8d202d97..df80cd393 100644 --- a/kernel/power/trsm_kernel_ppc440_RT.S +++ b/kernel/power/trsm_kernel_ppc440_RT.S @@ -185,24 +185,24 @@ #if defined(_AIX) || defined(__APPLE__) #if !defined(__64BIT__) && defined(DOUBLE) - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif slwi LDC, LDC, BASE_SHIFT #if defined(linux) && defined(__64BIT__) - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 112 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else - lwz OFFSET, 56 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zaxpy.S b/kernel/power/zaxpy.S index 1acd729ae..ac5b249bb 100644 --- a/kernel/power/zaxpy.S +++ b/kernel/power/zaxpy.S @@ -124,19 +124,19 @@ stfd f25, 88(SP) #if defined(linux) && defined(__64BIT__) - ld INCY, 112 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld INCY, 112 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz INCY, 56 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zaxpy_ppc440.S b/kernel/power/zaxpy_ppc440.S index 1ac232444..b5c604e91 100644 --- a/kernel/power/zaxpy_ppc440.S +++ b/kernel/power/zaxpy_ppc440.S @@ -113,19 +113,19 @@ stfd f25, 88(SP) #if defined(linux) && defined(__64BIT__) - ld INCY, 112 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld INCY, 112 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz INCY, 56 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_beta.S b/kernel/power/zgemm_beta.S index 4a9cbd8bb..43b72ca15 100644 --- a/kernel/power/zgemm_beta.S +++ b/kernel/power/zgemm_beta.S @@ -64,24 +64,24 @@ #ifdef linux #ifndef __64BIT__ - lwz LDC, 8 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #else - ld C, 120 + STACKSIZE(SP) - ld LDC, 128 + STACKSIZE(SP) + ld C, FRAMESLOT(1) + STACKSIZE(SP) + ld LDC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld C, 120 + STACKSIZE(SP) - ld LDC, 128 + STACKSIZE(SP) + ld C, FRAMESLOT(1) + STACKSIZE(SP) + ld LDC, FRAMESLOT(2) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz C, 68 + STACKSIZE(SP) - lwz LDC, 72 + STACKSIZE(SP) + lwz C, FRAMESLOT(3) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(4) + STACKSIZE(SP) #else - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_kernel.S b/kernel/power/zgemm_kernel.S index 3d6689531..8ec8b674a 100644 --- a/kernel/power/zgemm_kernel.S +++ b/kernel/power/zgemm_kernel.S @@ -171,37 +171,37 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #ifdef TRMMKERNEL #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif @@ -233,25 +233,25 @@ #ifdef linux #ifndef __64BIT__ - lwz PREA, 16 + STACKSIZE(SP) - lwz PREC, 20 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 72 + STACKSIZE(SP) - lwz PREC, 76 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(4) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz PREA, 68 + STACKSIZE(SP) - lwz PREC, 72 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(3) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_kernel_altivec.S b/kernel/power/zgemm_kernel_altivec.S index 2267e975a..2b650cd02 100644 --- a/kernel/power/zgemm_kernel_altivec.S +++ b/kernel/power/zgemm_kernel_altivec.S @@ -240,20 +240,20 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -266,25 +266,25 @@ #ifdef linux #ifndef __64BIT__ - lwz PREB, 16 + STACKSIZE(SP) - lwz PREC, 20 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - ld PREB, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREB, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREB, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREB, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREB, 72 + STACKSIZE(SP) - lwz PREC, 76 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(4) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz PREB, 68 + STACKSIZE(SP) - lwz PREC, 72 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(3) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_kernel_altivec_cell.S b/kernel/power/zgemm_kernel_altivec_cell.S index 9a1407d6e..642d1f2e7 100644 --- a/kernel/power/zgemm_kernel_altivec_cell.S +++ b/kernel/power/zgemm_kernel_altivec_cell.S @@ -246,20 +246,20 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -272,25 +272,25 @@ #ifdef linux #ifndef __64BIT__ - lwz PREB, 16 + STACKSIZE(SP) - lwz PREC, 20 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - ld PREB, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREB, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREB, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREB, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREB, 72 + STACKSIZE(SP) - lwz PREC, 76 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(4) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz PREB, 68 + STACKSIZE(SP) - lwz PREC, 72 + STACKSIZE(SP) + lwz PREB, FRAMESLOT(3) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_kernel_altivec_g4.S b/kernel/power/zgemm_kernel_altivec_g4.S index 4c774a1e3..0f7a6f9aa 100644 --- a/kernel/power/zgemm_kernel_altivec_g4.S +++ b/kernel/power/zgemm_kernel_altivec_g4.S @@ -240,20 +240,20 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_kernel_cell.S b/kernel/power/zgemm_kernel_cell.S index 5667b130d..8fd6b0afb 100644 --- a/kernel/power/zgemm_kernel_cell.S +++ b/kernel/power/zgemm_kernel_cell.S @@ -177,37 +177,37 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #ifdef TRMMKERNEL #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif @@ -232,25 +232,25 @@ #ifdef linux #ifndef __64BIT__ - lwz PREA, 16 + STACKSIZE(SP) - lwz PREC, 20 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 72 + STACKSIZE(SP) - lwz PREC, 76 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(4) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz PREA, 68 + STACKSIZE(SP) - lwz PREC, 72 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(3) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_kernel_g4.S b/kernel/power/zgemm_kernel_g4.S index af6f88e99..bf6bf77e8 100644 --- a/kernel/power/zgemm_kernel_g4.S +++ b/kernel/power/zgemm_kernel_g4.S @@ -187,37 +187,37 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #ifdef TRMMKERNEL #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_kernel_power3.S b/kernel/power/zgemm_kernel_power3.S index d7d6e2aea..471d3b9ae 100644 --- a/kernel/power/zgemm_kernel_power3.S +++ b/kernel/power/zgemm_kernel_power3.S @@ -163,20 +163,20 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif @@ -204,25 +204,25 @@ #ifdef linux #ifndef __64BIT__ - lwz PREA, 16 + STACKSIZE(SP) - lwz PREC, 20 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 72 + STACKSIZE(SP) - lwz PREC, 76 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(4) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz PREA, 68 + STACKSIZE(SP) - lwz PREC, 72 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(3) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_kernel_power6.S b/kernel/power/zgemm_kernel_power6.S index 3f79c0523..3c28649bc 100644 --- a/kernel/power/zgemm_kernel_power6.S +++ b/kernel/power/zgemm_kernel_power6.S @@ -201,37 +201,37 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #ifdef TRMMKERNEL #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemm_kernel_ppc440.S b/kernel/power/zgemm_kernel_ppc440.S index 075fa2b4f..748b69a0c 100644 --- a/kernel/power/zgemm_kernel_ppc440.S +++ b/kernel/power/zgemm_kernel_ppc440.S @@ -184,37 +184,37 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #ifdef TRMMKERNEL #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zgemv_n.S b/kernel/power/zgemv_n.S index ba4685dec..23e0177c0 100644 --- a/kernel/power/zgemv_n.S +++ b/kernel/power/zgemv_n.S @@ -247,31 +247,31 @@ #ifdef linux #ifndef __64BIT__ - lwz INCY, 8 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) #else - ld INCX, 112 + STACKSIZE(SP) - ld Y, 120 + STACKSIZE(SP) - ld INCY, 128 + STACKSIZE(SP) + ld INCX, FRAMESLOT(0) + STACKSIZE(SP) + ld Y, FRAMESLOT(1) + STACKSIZE(SP) + ld INCY, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz LDA, 56 + STACKSIZE(SP) - lwz X, 60 + STACKSIZE(SP) - lwz INCX, 64 + STACKSIZE(SP) - lwz Y, 68 + STACKSIZE(SP) - lwz INCY, 72 + STACKSIZE(SP) + lwz LDA, FRAMESLOT(0) + STACKSIZE(SP) + lwz X, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCX, FRAMESLOT(2) + STACKSIZE(SP) + lwz Y, FRAMESLOT(3) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(4) + STACKSIZE(SP) #else - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) #endif #else - ld INCX, 112 + STACKSIZE(SP) - ld Y, 120 + STACKSIZE(SP) - ld INCY, 128 + STACKSIZE(SP) + ld INCX, FRAMESLOT(0) + STACKSIZE(SP) + ld Y, FRAMESLOT(1) + STACKSIZE(SP) + ld INCY, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/zgemv_n_ppc440.S b/kernel/power/zgemv_n_ppc440.S index 31e720261..55dd2d84f 100644 --- a/kernel/power/zgemv_n_ppc440.S +++ b/kernel/power/zgemv_n_ppc440.S @@ -225,36 +225,36 @@ #ifdef linux #ifndef __64BIT__ - lwz INCY, 8 + STACKSIZE(SP) - lwz BUFFER, 12 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #else - ld INCX, 112 + STACKSIZE(SP) - ld Y, 120 + STACKSIZE(SP) - ld INCY, 128 + STACKSIZE(SP) - ld BUFFER, 136 + STACKSIZE(SP) + ld INCX, FRAMESLOT(0) + STACKSIZE(SP) + ld Y, FRAMESLOT(1) + STACKSIZE(SP) + ld INCY, FRAMESLOT(2) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz LDA, 56 + STACKSIZE(SP) - lwz X, 60 + STACKSIZE(SP) - lwz INCX, 64 + STACKSIZE(SP) - lwz Y, 68 + STACKSIZE(SP) - lwz INCY, 72 + STACKSIZE(SP) - lwz BUFFER, 76 + STACKSIZE(SP) + lwz LDA, FRAMESLOT(0) + STACKSIZE(SP) + lwz X, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCX, FRAMESLOT(2) + STACKSIZE(SP) + lwz Y, FRAMESLOT(3) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(4) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) - lwz BUFFER, 68 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #else - ld INCX, 112 + STACKSIZE(SP) - ld Y, 120 + STACKSIZE(SP) - ld INCY, 128 + STACKSIZE(SP) - ld BUFFER, 136 + STACKSIZE(SP) + ld INCX, FRAMESLOT(0) + STACKSIZE(SP) + ld Y, FRAMESLOT(1) + STACKSIZE(SP) + ld INCY, FRAMESLOT(2) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/zgemv_t.S b/kernel/power/zgemv_t.S index bd8ac4043..c0bad3152 100644 --- a/kernel/power/zgemv_t.S +++ b/kernel/power/zgemv_t.S @@ -222,36 +222,36 @@ #ifdef linux #ifndef __64BIT__ - lwz INCY, 8 + STACKSIZE(SP) - lwz BUFFER, 12 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #else - ld INCX, 112 + STACKSIZE(SP) - ld Y, 120 + STACKSIZE(SP) - ld INCY, 128 + STACKSIZE(SP) - ld BUFFER, 136 + STACKSIZE(SP) + ld INCX, FRAMESLOT(0) + STACKSIZE(SP) + ld Y, FRAMESLOT(1) + STACKSIZE(SP) + ld INCY, FRAMESLOT(2) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz LDA, 56 + STACKSIZE(SP) - lwz X, 60 + STACKSIZE(SP) - lwz INCX, 64 + STACKSIZE(SP) - lwz Y, 68 + STACKSIZE(SP) - lwz INCY, 72 + STACKSIZE(SP) - lwz BUFFER, 76 + STACKSIZE(SP) + lwz LDA, FRAMESLOT(0) + STACKSIZE(SP) + lwz X, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCX, FRAMESLOT(2) + STACKSIZE(SP) + lwz Y, FRAMESLOT(3) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(4) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) - lwz BUFFER, 68 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #else - ld INCX, 112 + STACKSIZE(SP) - ld Y, 120 + STACKSIZE(SP) - ld INCY, 128 + STACKSIZE(SP) - ld BUFFER, 136 + STACKSIZE(SP) + ld INCX, FRAMESLOT(0) + STACKSIZE(SP) + ld Y, FRAMESLOT(1) + STACKSIZE(SP) + ld INCY, FRAMESLOT(2) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/zgemv_t_ppc440.S b/kernel/power/zgemv_t_ppc440.S index 043b9e37b..bfc039a0c 100644 --- a/kernel/power/zgemv_t_ppc440.S +++ b/kernel/power/zgemv_t_ppc440.S @@ -181,36 +181,36 @@ #ifdef linux #ifndef __64BIT__ - lwz INCY, 8 + STACKSIZE(SP) - lwz BUFFER, 12 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #else - ld INCX, 112 + STACKSIZE(SP) - ld Y, 120 + STACKSIZE(SP) - ld INCY, 128 + STACKSIZE(SP) - ld BUFFER, 136 + STACKSIZE(SP) + ld INCX, FRAMESLOT(0) + STACKSIZE(SP) + ld Y, FRAMESLOT(1) + STACKSIZE(SP) + ld INCY, FRAMESLOT(2) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz LDA, 56 + STACKSIZE(SP) - lwz X, 60 + STACKSIZE(SP) - lwz INCX, 64 + STACKSIZE(SP) - lwz Y, 68 + STACKSIZE(SP) - lwz INCY, 72 + STACKSIZE(SP) - lwz BUFFER, 76 + STACKSIZE(SP) + lwz LDA, FRAMESLOT(0) + STACKSIZE(SP) + lwz X, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCX, FRAMESLOT(2) + STACKSIZE(SP) + lwz Y, FRAMESLOT(3) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(4) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) - lwz BUFFER, 68 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #else - ld INCX, 112 + STACKSIZE(SP) - ld Y, 120 + STACKSIZE(SP) - ld INCY, 128 + STACKSIZE(SP) - ld BUFFER, 136 + STACKSIZE(SP) + ld INCX, FRAMESLOT(0) + STACKSIZE(SP) + ld Y, FRAMESLOT(1) + STACKSIZE(SP) + ld INCY, FRAMESLOT(2) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/zger.S b/kernel/power/zger.S index 01cb90731..a9a607815 100644 --- a/kernel/power/zger.S +++ b/kernel/power/zger.S @@ -237,36 +237,36 @@ #ifdef linux #ifndef __64BIT__ - lwz LDA, 8 + STACKSIZE(SP) - lwz BUFFER, 12 + STACKSIZE(SP) + lwz LDA, FRAMESLOT(0) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(1) + STACKSIZE(SP) #else - ld INCY, 112 + STACKSIZE(SP) - ld A, 120 + STACKSIZE(SP) - ld LDA, 128 + STACKSIZE(SP) - ld BUFFER, 136 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) + ld A, FRAMESLOT(1) + STACKSIZE(SP) + ld LDA, FRAMESLOT(2) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) - lwz A, 68 + STACKSIZE(SP) - lwz LDA, 72 + STACKSIZE(SP) - lwz BUFFER, 76 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) + lwz A, FRAMESLOT(3) + STACKSIZE(SP) + lwz LDA, FRAMESLOT(4) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz INCY, 56 + STACKSIZE(SP) - lwz A, 60 + STACKSIZE(SP) - lwz LDA, 64 + STACKSIZE(SP) - lwz BUFFER, 68 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) + lwz A, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDA, FRAMESLOT(2) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #else - ld INCY, 112 + STACKSIZE(SP) - ld A, 120 + STACKSIZE(SP) - ld LDA, 128 + STACKSIZE(SP) - ld BUFFER, 136 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) + ld A, FRAMESLOT(1) + STACKSIZE(SP) + ld LDA, FRAMESLOT(2) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(3) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/zswap.S b/kernel/power/zswap.S index 048e8ac5f..8befadca2 100644 --- a/kernel/power/zswap.S +++ b/kernel/power/zswap.S @@ -118,19 +118,19 @@ stfd f31, 136(SP) #if defined(linux) && defined(__64BIT__) - ld INCY, 112 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld INCY, 112 + STACKSIZE(SP) + ld INCY, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz INCX, 56 + STACKSIZE(SP) - lwz Y, 60 + STACKSIZE(SP) - lwz INCY, 64 + STACKSIZE(SP) + lwz INCX, FRAMESLOT(0) + STACKSIZE(SP) + lwz Y, FRAMESLOT(1) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz INCY, 56 + STACKSIZE(SP) + lwz INCY, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/zsymv_L.S b/kernel/power/zsymv_L.S index ad4a8cd5c..b348e328f 100644 --- a/kernel/power/zsymv_L.S +++ b/kernel/power/zsymv_L.S @@ -261,31 +261,31 @@ #ifdef linux #ifndef __64BIT__ - lwz BUFFER, 56 + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(0) + STACKSIZE(SP) #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz X, 56 + STACKSIZE(SP) - lwz INCX, 60 + STACKSIZE(SP) - lwz Y, 64 + STACKSIZE(SP) - lwz INCY, 68 + STACKSIZE(SP) - lwz BUFFER, 72 + STACKSIZE(SP) + lwz X, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCX, FRAMESLOT(1) + STACKSIZE(SP) + lwz Y, FRAMESLOT(2) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(3) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(4) + STACKSIZE(SP) #else - lwz Y, 56 + STACKSIZE(SP) - lwz INCY, 60 + STACKSIZE(SP) - lwz BUFFER, 64 + STACKSIZE(SP) + lwz Y, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(1) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/zsymv_U.S b/kernel/power/zsymv_U.S index 4032b66bb..b631cbe35 100644 --- a/kernel/power/zsymv_U.S +++ b/kernel/power/zsymv_U.S @@ -258,31 +258,31 @@ #ifdef linux #ifndef __64BIT__ - lwz BUFFER, 56 + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(0) + STACKSIZE(SP) #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifndef __64BIT__ #ifdef DOUBLE - lwz X, 56 + STACKSIZE(SP) - lwz INCX, 60 + STACKSIZE(SP) - lwz Y, 64 + STACKSIZE(SP) - lwz INCY, 68 + STACKSIZE(SP) - lwz BUFFER, 72 + STACKSIZE(SP) + lwz X, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCX, FRAMESLOT(1) + STACKSIZE(SP) + lwz Y, FRAMESLOT(2) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(3) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(4) + STACKSIZE(SP) #else - lwz Y, 56 + STACKSIZE(SP) - lwz INCY, 60 + STACKSIZE(SP) - lwz BUFFER, 64 + STACKSIZE(SP) + lwz Y, FRAMESLOT(0) + STACKSIZE(SP) + lwz INCY, FRAMESLOT(1) + STACKSIZE(SP) + lwz BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #else - ld Y, 112 + STACKSIZE(SP) - ld INCY, 120 + STACKSIZE(SP) - ld BUFFER, 128 + STACKSIZE(SP) + ld Y, FRAMESLOT(0) + STACKSIZE(SP) + ld INCY, FRAMESLOT(1) + STACKSIZE(SP) + ld BUFFER, FRAMESLOT(2) + STACKSIZE(SP) #endif #endif diff --git a/kernel/power/ztrsm_kernel_LN.S b/kernel/power/ztrsm_kernel_LN.S index 64fb96823..87473b45d 100644 --- a/kernel/power/ztrsm_kernel_LN.S +++ b/kernel/power/ztrsm_kernel_LN.S @@ -168,36 +168,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif @@ -246,25 +246,25 @@ #ifdef linux #ifndef __64BIT__ - lwz PREA, 16 + STACKSIZE(SP) - lwz PREC, 20 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 72 + STACKSIZE(SP) - lwz PREC, 76 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(4) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz PREA, 68 + STACKSIZE(SP) - lwz PREC, 72 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(3) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_LT.S b/kernel/power/ztrsm_kernel_LT.S index ae4615cf5..db0860124 100644 --- a/kernel/power/ztrsm_kernel_LT.S +++ b/kernel/power/ztrsm_kernel_LT.S @@ -168,36 +168,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif @@ -249,25 +249,25 @@ #ifdef linux #ifndef __64BIT__ - lwz PREA, 16 + STACKSIZE(SP) - lwz PREC, 20 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 72 + STACKSIZE(SP) - lwz PREC, 76 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(4) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz PREA, 68 + STACKSIZE(SP) - lwz PREC, 72 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(3) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_RT.S b/kernel/power/ztrsm_kernel_RT.S index f756dda77..c50ab86df 100644 --- a/kernel/power/ztrsm_kernel_RT.S +++ b/kernel/power/ztrsm_kernel_RT.S @@ -168,36 +168,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif @@ -249,25 +249,25 @@ #ifdef linux #ifndef __64BIT__ - lwz PREA, 16 + STACKSIZE(SP) - lwz PREC, 20 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 72 + STACKSIZE(SP) - lwz PREC, 76 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(4) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz PREA, 68 + STACKSIZE(SP) - lwz PREC, 72 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(3) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_cell_LN.S b/kernel/power/ztrsm_kernel_cell_LN.S index 2427a4ddd..884a3e864 100644 --- a/kernel/power/ztrsm_kernel_cell_LN.S +++ b/kernel/power/ztrsm_kernel_cell_LN.S @@ -174,36 +174,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_cell_LT.S b/kernel/power/ztrsm_kernel_cell_LT.S index 0d88ded9a..388dfe3c2 100644 --- a/kernel/power/ztrsm_kernel_cell_LT.S +++ b/kernel/power/ztrsm_kernel_cell_LT.S @@ -174,36 +174,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif @@ -248,25 +248,25 @@ #ifdef linux #ifndef __64BIT__ - lwz PREA, 16 + STACKSIZE(SP) - lwz PREC, 20 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(2) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(3) + STACKSIZE(SP) #else - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld PREA, 136 + STACKSIZE(SP) - ld PREC, 144 + STACKSIZE(SP) + ld PREA, FRAMESLOT(3) + STACKSIZE(SP) + ld PREC, FRAMESLOT(4) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz PREA, 72 + STACKSIZE(SP) - lwz PREC, 76 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(4) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(5) + STACKSIZE(SP) #else - lwz PREA, 68 + STACKSIZE(SP) - lwz PREC, 72 + STACKSIZE(SP) + lwz PREA, FRAMESLOT(3) + STACKSIZE(SP) + lwz PREC, FRAMESLOT(4) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_cell_RT.S b/kernel/power/ztrsm_kernel_cell_RT.S index 84f2089fa..00b50fe04 100644 --- a/kernel/power/ztrsm_kernel_cell_RT.S +++ b/kernel/power/ztrsm_kernel_cell_RT.S @@ -174,36 +174,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_power6_LN.S b/kernel/power/ztrsm_kernel_power6_LN.S index 42239bb55..65b8077db 100644 --- a/kernel/power/ztrsm_kernel_power6_LN.S +++ b/kernel/power/ztrsm_kernel_power6_LN.S @@ -186,36 +186,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_power6_LT.S b/kernel/power/ztrsm_kernel_power6_LT.S index dfae4d60b..c27170604 100644 --- a/kernel/power/ztrsm_kernel_power6_LT.S +++ b/kernel/power/ztrsm_kernel_power6_LT.S @@ -186,36 +186,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_power6_RT.S b/kernel/power/ztrsm_kernel_power6_RT.S index 79f8b70b8..ff0338cdc 100644 --- a/kernel/power/ztrsm_kernel_power6_RT.S +++ b/kernel/power/ztrsm_kernel_power6_RT.S @@ -186,36 +186,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_ppc440_LN.S b/kernel/power/ztrsm_kernel_ppc440_LN.S index 51db71903..d33522456 100644 --- a/kernel/power/ztrsm_kernel_ppc440_LN.S +++ b/kernel/power/ztrsm_kernel_ppc440_LN.S @@ -179,36 +179,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_ppc440_LT.S b/kernel/power/ztrsm_kernel_ppc440_LT.S index b5e23b3c6..a9e7b891f 100644 --- a/kernel/power/ztrsm_kernel_ppc440_LT.S +++ b/kernel/power/ztrsm_kernel_ppc440_LT.S @@ -179,36 +179,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif diff --git a/kernel/power/ztrsm_kernel_ppc440_RT.S b/kernel/power/ztrsm_kernel_ppc440_RT.S index 2bb374d22..43f4b07cb 100644 --- a/kernel/power/ztrsm_kernel_ppc440_RT.S +++ b/kernel/power/ztrsm_kernel_ppc440_RT.S @@ -179,36 +179,36 @@ #ifdef linux #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld LDC, 112 + STACKSIZE(SP) + ld LDC, FRAMESLOT(0) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz B, 56 + STACKSIZE(SP) - lwz C, 60 + STACKSIZE(SP) - lwz LDC, 64 + STACKSIZE(SP) + lwz B, FRAMESLOT(0) + STACKSIZE(SP) + lwz C, FRAMESLOT(1) + STACKSIZE(SP) + lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) #else - lwz LDC, 56 + STACKSIZE(SP) + lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) #endif #endif #endif #if defined(linux) && defined(__64BIT__) - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #if defined(_AIX) || defined(__APPLE__) #ifdef __64BIT__ - ld OFFSET, 120 + STACKSIZE(SP) + ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #else #ifdef DOUBLE - lwz OFFSET, 68 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) #else - lwz OFFSET, 60 + STACKSIZE(SP) + lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) #endif #endif #endif From b8d64a856a6d0e345ae60e0ee66f47c6900c4ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A1bio=20Perez?= Date: Wed, 5 Aug 2015 11:02:39 -0300 Subject: [PATCH 02/29] Add POWER7/POWER8 as targets --- TargetList.txt | 2 ++ cpuid_power.c | 1 + getarch.c | 4 +++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/TargetList.txt b/TargetList.txt index 0a9d8b40c..b2878ba32 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -44,6 +44,8 @@ NANO POWER4 POWER5 POWER6 +POWER7 +POWER8 PPCG4 PPC970 PPC970MP diff --git a/cpuid_power.c b/cpuid_power.c index 2fc333dd2..366c6ed08 100644 --- a/cpuid_power.c +++ b/cpuid_power.c @@ -115,6 +115,7 @@ int detect(void){ if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; + if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6; if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; diff --git a/getarch.c b/getarch.c index d6ecaeb62..d56a37a7a 100644 --- a/getarch.c +++ b/getarch.c @@ -116,6 +116,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /* #define FORCE_POWER4 */ /* #define FORCE_POWER5 */ /* #define FORCE_POWER6 */ +/* #define FORCE_POWER7 */ +/* #define FORCE_POWER8 */ /* #define FORCE_PPCG4 */ /* #define FORCE_PPC970 */ /* #define FORCE_PPC970MP */ @@ -546,7 +548,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CORENAME "POWER5" #endif -#ifdef FORCE_POWER6 +#if defined(FORCE_POWER6) || defined(FORCE_POWER7) || defined(FORCE_POWER8) #define FORCE #define ARCHITECTURE "POWER" #define SUBARCHITECTURE "POWER6" From 87336b9acf1216927b911f19e8417035f420f69c Mon Sep 17 00:00:00 2001 From: xantares Date: Thu, 6 Aug 2015 20:03:50 +0200 Subject: [PATCH 03/29] install OpenBLASConfigVersion.cmake --- Makefile.install | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Makefile.install b/Makefile.install index a5814e55a..9814302b0 100644 --- a/Makefile.install +++ b/Makefile.install @@ -11,6 +11,7 @@ OPENBLAS_BINARY_DIR := $(PREFIX)/bin OPENBLAS_BUILD_DIR := $(CURDIR) OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake +OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake .PHONY : install .NOTPARALLEL : install @@ -97,6 +98,7 @@ endif @echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) @echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) @echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) + ifndef NO_SHARED #ifeq logical or ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD)) @@ -112,5 +114,16 @@ else #only static @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) endif +#Generating OpenBLASConfigVersion.cmake + @echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) + @echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) + @echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) + @echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) + @echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) + @echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) + @echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) + @echo " set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) + @echo " endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) + @echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) @echo Install OK! From c2323dd4d2a65420f77c73f7b55c41ba469a47f8 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 16 Aug 2015 15:18:42 +0200 Subject: [PATCH 04/29] really fix ARM locking - was writing 0 to lock variable, so was ineffective - only exit loop if both lock was 0 and strex was successful --- common_arm.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/common_arm.h b/common_arm.h index 135191057..2dabd4d7f 100644 --- a/common_arm.h +++ b/common_arm.h @@ -59,22 +59,16 @@ static void __inline blas_lock(volatile BLASULONG *address){ while (*address) {YIELDING;}; __asm__ __volatile__( - "1: \n\t" - "ldrex r2, [%1] \n\t" - "mov r2, #0 \n\t" - "strex r3, r2, [%1] \n\t" - "cmp r3, #0 \n\t" - "bne 1b \n\t" - "mov %0 , r3 \n\t" - : "=r"(ret), "=r"(address) - : "1"(address) - : "memory", "r2" , "r3" - - + "ldrex r2, [%1] \n\t" + "strex %0, %2, [%1] \n\t" + "orr %0, r2 \n\t" + : "=&r"(ret) + : "r"(address), "r"(1) + : "memory", "r2" ); } while (ret); - + MB; } From d3e2f0a1af73a6e74258294c911e7f4cb72d8ab5 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 16 Aug 2015 15:37:02 +0200 Subject: [PATCH 05/29] add missing barriers should fix issue #597 --- driver/others/blas_server.c | 11 ++++++++++- driver/others/memory.c | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index b3b1ce7bd..1fd848c6b 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -425,6 +425,10 @@ static int blas_thread_server(void *arg){ main_status[cpu] = MAIN_FINISH; #endif + // arm: make sure all results are written out _before_ + // thread is marked as done and other threads use them + WMB; + thread_status[cpu].queue = (blas_queue_t * volatile) ((long)thread_status[cpu].queue & 0); /* Need a trick */ WMB; @@ -775,7 +779,12 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){ stop = rpcc(); #endif - if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next); + if ((num > 1) && queue -> next) { + exec_blas_async_wait(num - 1, queue -> next); + + // arm: make sure results from other threads are visible + MB; + } #ifdef TIMING_DEBUG fprintf(STDERR, "Thread[0] : %16lu %16lu (%8lu cycles)\n", diff --git a/driver/others/memory.c b/driver/others/memory.c index a562da377..49c57f911 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -1153,6 +1153,9 @@ void blas_memory_free(void *free_area){ printf(" Position : %d\n", position); #endif + // arm: ensure all writes are finished before other thread takes this memory + WMB; + memory[position].used = 0; #ifdef DEBUG From e12cf1123e8784ce6fe9d2ac14526331fbe2c555 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 16 Aug 2015 17:27:25 +0200 Subject: [PATCH 06/29] add fallback rpcc implementation - use on arm, arm64 and any new platform - use faster integer math instead of double - use similar scale as rdtsc so that timeouts work --- common.h | 28 ++++++++++++++++++++++++++++ common_alpha.h | 1 + common_arm.h | 10 ---------- common_arm64.h | 10 ---------- common_ia64.h | 2 ++ common_mips64.h | 1 + common_power.h | 1 + common_sparc.h | 1 + common_x86.h | 1 + common_x86_64.h | 1 + 10 files changed, 36 insertions(+), 20 deletions(-) diff --git a/common.h b/common.h index 320adadcb..5998b5608 100644 --- a/common.h +++ b/common.h @@ -410,7 +410,35 @@ typedef char env_var_t[MAX_PATH]; typedef char* env_var_t; #define readenv(p, n) ((p)=getenv(n)) #endif + +#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS) +#ifdef _POSIX_MONOTONIC_CLOCK +#if defined(__GNUC_PREREQ) && __GLIBC_PREREQ(2, 17) // don't require -lrt +#define USE_MONOTONIC +#elif defined(OS_ANDROID) +#define USE_MONOTONIC #endif +#endif +/* use similar scale as x86 rdtsc for timeouts to work correctly */ +static inline unsigned long long rpcc(void){ +#ifdef USE_MONOTONIC + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec; +#else + struct timeval tv; + gettimeofday(&tv,NULL); + return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000; +#endif +} +#define RPCC_DEFINED +#define RPCC64BIT +#endif // !RPCC_DEFINED + +#ifndef RPCC_DEFINED +#error "rpcc() implementation is missing for your platform" +#endif +#endif // !ASSEMBLER #ifdef OS_LINUX #include "common_linux.h" diff --git a/common_alpha.h b/common_alpha.h index 845fb316a..86f58966a 100644 --- a/common_alpha.h +++ b/common_alpha.h @@ -89,6 +89,7 @@ static __inline unsigned int rpcc(void){ return r0; } +#define RPCC_DEFINED #define HALT ldq $0, 0($0) diff --git a/common_arm.h b/common_arm.h index 2dabd4d7f..7e0c02306 100644 --- a/common_arm.h +++ b/common_arm.h @@ -72,16 +72,6 @@ static void __inline blas_lock(volatile BLASULONG *address){ } -static inline unsigned long long rpcc(void){ - unsigned long long ret=0; - double v; - struct timeval tv; - gettimeofday(&tv,NULL); - v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6; - ret = (unsigned long long) ( v * 1000.0d ); - return ret; -} - static inline int blas_quickdivide(blasint x, blasint y){ return x / y; } diff --git a/common_arm64.h b/common_arm64.h index aa310c5f2..cc08fa75b 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -71,16 +71,6 @@ static void __inline blas_lock(volatile BLASULONG *address){ } -static inline unsigned long long rpcc(void){ - unsigned long long ret=0; - double v; - struct timeval tv; - gettimeofday(&tv,NULL); - v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6; - ret = (unsigned long long) ( v * 1000.0d ); - return ret; -} - static inline int blas_quickdivide(blasint x, blasint y){ return x / y; } diff --git a/common_ia64.h b/common_ia64.h index 8e92b5992..d1f210749 100644 --- a/common_ia64.h +++ b/common_ia64.h @@ -75,6 +75,7 @@ static __inline unsigned long rpcc(void) { __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks)); return clocks; } +#define RPCC_DEFINED static __inline unsigned long stmxcsr(void){ @@ -103,6 +104,7 @@ static __inline void blas_lock(volatile unsigned long *address){ static __inline unsigned int rpcc(void) { return __getReg(_IA64_REG_AR_ITC); } +#define RPCC_DEFINED static __inline unsigned int stmxcsr(void) { return __getReg(_IA64_REG_AR_FPSR); diff --git a/common_mips64.h b/common_mips64.h index 7cd86b375..bc1a52fb4 100644 --- a/common_mips64.h +++ b/common_mips64.h @@ -118,6 +118,7 @@ static inline unsigned int rpcc(void){ #endif return ret; } +#define RPCC_DEFINED #if defined(LOONGSON3A) || defined(LOONGSON3B) #ifndef NO_AFFINITY diff --git a/common_power.h b/common_power.h index e9b5cb630..3b9471a17 100644 --- a/common_power.h +++ b/common_power.h @@ -103,6 +103,7 @@ static inline unsigned long rpcc(void){ #endif } +#define RPCC_DEFINED #ifdef __64BIT__ #define RPCC64BIT diff --git a/common_sparc.h b/common_sparc.h index 87ef75276..8a16e3d3a 100644 --- a/common_sparc.h +++ b/common_sparc.h @@ -66,6 +66,7 @@ static __inline unsigned long rpcc(void){ return clocks; }; +#define RPCC_DEFINED #ifdef __64BIT__ #define RPCC64BIT diff --git a/common_x86.h b/common_x86.h index 99a723fd7..9506716ce 100644 --- a/common_x86.h +++ b/common_x86.h @@ -73,6 +73,7 @@ static __inline unsigned long long rpcc(void){ return ((unsigned long long)a + ((unsigned long long)d << 32)); }; +#define RPCC_DEFINED static __inline unsigned long getstackaddr(void){ unsigned long addr; diff --git a/common_x86_64.h b/common_x86_64.h index efb902416..3a02beefb 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -82,6 +82,7 @@ static __inline BLASULONG rpcc(void){ return ((BLASULONG)a + ((BLASULONG)d << 32)); } +#define RPCC_DEFINED #define RPCC64BIT From f2ac1a5cee9eebfaad33194e362fa2c05e2b05d9 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 16 Aug 2015 18:08:45 +0200 Subject: [PATCH 07/29] set ARMV7 for Cortex-A9 and Cortex-A15 otherwise some macros like YIELDING are not defined correctly --- common_arm.h | 4 ++++ cpuid_arm.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/common_arm.h b/common_arm.h index 7e0c02306..74b6378dd 100644 --- a/common_arm.h +++ b/common_arm.h @@ -124,4 +124,8 @@ REALNAME: #define MAP_ANONYMOUS MAP_ANON #endif +#if !defined(ARMV5) && !defined(ARMV6) && !defined(ARMV7) && !defined(ARMV8) +#error "you must define ARMV5, ARMV6, ARMV7 or ARMV8" +#endif + #endif diff --git a/cpuid_arm.c b/cpuid_arm.c index 51ba72d70..6485003f3 100644 --- a/cpuid_arm.c +++ b/cpuid_arm.c @@ -192,6 +192,7 @@ void get_cpuconfig(void) { case CPU_CORTEXA9: printf("#define CORTEXA9\n"); + printf("#define ARMV7\n"); printf("#define HAVE_VFP\n"); printf("#define HAVE_VFPV3\n"); if ( get_feature("neon")) printf("#define HAVE_NEON\n"); @@ -207,6 +208,7 @@ void get_cpuconfig(void) case CPU_CORTEXA15: printf("#define CORTEXA15\n"); + printf("#define ARMV7\n"); printf("#define HAVE_VFP\n"); printf("#define HAVE_VFPV3\n"); if ( get_feature("neon")) printf("#define HAVE_NEON\n"); From 6b92204a7ce5faf8dab2301c59aa69a26f6b8a19 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 16 Aug 2015 18:10:34 +0200 Subject: [PATCH 08/29] add fallback blas_lock implementation to be used on armv5 and new platforms --- common.h | 14 ++++++++++++++ common_alpha.h | 1 + common_arm.h | 4 ++++ common_arm64.h | 1 + common_ia64.h | 2 ++ common_mips64.h | 1 + common_power.h | 1 + common_sparc.h | 1 + common_x86.h | 1 + common_x86_64.h | 1 + 10 files changed, 27 insertions(+) diff --git a/common.h b/common.h index 5998b5608..6073f037f 100644 --- a/common.h +++ b/common.h @@ -435,9 +435,23 @@ static inline unsigned long long rpcc(void){ #define RPCC64BIT #endif // !RPCC_DEFINED +#if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__) +static void __inline blas_lock(volatile BLASULONG *address){ + + do { + while (*address) {YIELDING;}; + + } while (!__sync_bool_compare_and_swap(address, 0, 1)); +} +#define BLAS_LOCK_DEFINED +#endif + #ifndef RPCC_DEFINED #error "rpcc() implementation is missing for your platform" #endif +#ifndef BLAS_LOCK_DEFINED +#error "blas_lock() implementation is missing for your platform" +#endif #endif // !ASSEMBLER #ifdef OS_LINUX diff --git a/common_alpha.h b/common_alpha.h index 86f58966a..9739c941d 100644 --- a/common_alpha.h +++ b/common_alpha.h @@ -76,6 +76,7 @@ static void __inline blas_lock(unsigned long *address){ "30:", address); #endif } +#define BLAS_LOCK_DEFINED static __inline unsigned int rpcc(void){ diff --git a/common_arm.h b/common_arm.h index 74b6378dd..84691d766 100644 --- a/common_arm.h +++ b/common_arm.h @@ -51,6 +51,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef ASSEMBLER +#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8) + static void __inline blas_lock(volatile BLASULONG *address){ int register ret; @@ -71,6 +73,8 @@ static void __inline blas_lock(volatile BLASULONG *address){ MB; } +#define BLAS_LOCK_DEFINED +#endif static inline int blas_quickdivide(blasint x, blasint y){ return x / y; diff --git a/common_arm64.h b/common_arm64.h index cc08fa75b..c4e588d1f 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -69,6 +69,7 @@ static void __inline blas_lock(volatile BLASULONG *address){ } while (ret); } +#define BLAS_LOCK_DEFINED static inline int blas_quickdivide(blasint x, blasint y){ diff --git a/common_ia64.h b/common_ia64.h index d1f210749..72b75fc4e 100644 --- a/common_ia64.h +++ b/common_ia64.h @@ -68,6 +68,7 @@ static __inline void blas_lock(volatile unsigned long *address){ : "ar.ccv", "memory"); } while (ret); } +#define BLAS_LOCK_DEFINED static __inline unsigned long rpcc(void) { unsigned long clocks; @@ -100,6 +101,7 @@ static __inline void blas_lock(volatile unsigned long *address){ while (*address || _InterlockedCompareExchange((volatile int *) address,1,0)) ; } +#define BLAS_LOCK_DEFINED static __inline unsigned int rpcc(void) { return __getReg(_IA64_REG_AR_ITC); diff --git a/common_mips64.h b/common_mips64.h index bc1a52fb4..f5c0ec7cf 100644 --- a/common_mips64.h +++ b/common_mips64.h @@ -98,6 +98,7 @@ static void INLINE blas_lock(volatile unsigned long *address){ } while (ret); } +#define BLAS_LOCK_DEFINED static inline unsigned int rpcc(void){ unsigned long ret; diff --git a/common_power.h b/common_power.h index 3b9471a17..ab331b04a 100644 --- a/common_power.h +++ b/common_power.h @@ -87,6 +87,7 @@ static void INLINE blas_lock(volatile unsigned long *address){ #endif } while (ret); } +#define BLAS_LOCK_DEFINED static inline unsigned long rpcc(void){ unsigned long ret; diff --git a/common_sparc.h b/common_sparc.h index 8a16e3d3a..f99972db9 100644 --- a/common_sparc.h +++ b/common_sparc.h @@ -58,6 +58,7 @@ static void __inline blas_lock(volatile unsigned long *address){ : "memory"); } while (ret); } +#define BLAS_LOCK_DEFINED static __inline unsigned long rpcc(void){ unsigned long clocks; diff --git a/common_x86.h b/common_x86.h index 9506716ce..6c90432a2 100644 --- a/common_x86.h +++ b/common_x86.h @@ -65,6 +65,7 @@ static void __inline blas_lock(volatile BLASULONG *address){ } while (ret); } +#define BLAS_LOCK_DEFINED static __inline unsigned long long rpcc(void){ unsigned int a, d; diff --git a/common_x86_64.h b/common_x86_64.h index 3a02beefb..4c783b315 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -74,6 +74,7 @@ static void __inline blas_lock(volatile BLASULONG *address){ } while (ret); } +#define BLAS_LOCK_DEFINED static __inline BLASULONG rpcc(void){ BLASULONG a, d; From d38a1ddc7a4ef8c10017ae5b81a447e322721b94 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 16 Aug 2015 18:13:30 +0200 Subject: [PATCH 09/29] use real armv5 support there is no more requirement for ARMv6 instructions, and VFP on ARMv5 is uncommon --- Makefile.arm | 4 ++-- common_arm.h | 5 ++++- getarch.c | 3 +-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Makefile.arm b/Makefile.arm index 2f7b33730..272220ca9 100644 --- a/Makefile.arm +++ b/Makefile.arm @@ -26,8 +26,8 @@ endif ifeq ($(CORE), ARMV5) -CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 -FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 +CCOMMON_OPT += -marm -march=armv5 +FCOMMON_OPT += -marm -march=armv5 endif diff --git a/common_arm.h b/common_arm.h index 84691d766..6bf836835 100644 --- a/common_arm.h +++ b/common_arm.h @@ -80,7 +80,10 @@ static inline int blas_quickdivide(blasint x, blasint y){ return x / y; } -#if defined(DOUBLE) +#if !defined(HAVE_VFP) +/* no FPU, soft float */ +#define GET_IMAGE(res) +#elif defined(DOUBLE) #define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") #else #define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") diff --git a/getarch.c b/getarch.c index d56a37a7a..89e736a31 100644 --- a/getarch.c +++ b/getarch.c @@ -798,8 +798,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHCONFIG "-DARMV5 " \ "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ - "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ - "-DHAVE_VFP" + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " #define LIBNAME "armv5" #define CORENAME "ARMV5" #else From 3efeaed0d867c9d54701e9351de44e747cd21578 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Sun, 16 Aug 2015 20:11:13 +0200 Subject: [PATCH 10/29] correct a minor mistake --- common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common.h b/common.h index 6073f037f..a607c888b 100644 --- a/common.h +++ b/common.h @@ -413,7 +413,7 @@ typedef char* env_var_t; #if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS) #ifdef _POSIX_MONOTONIC_CLOCK -#if defined(__GNUC_PREREQ) && __GLIBC_PREREQ(2, 17) // don't require -lrt +#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 17) // don't require -lrt #define USE_MONOTONIC #elif defined(OS_ANDROID) #define USE_MONOTONIC From abade3f896634ebf6b9097469a05cb72d8fd7860 Mon Sep 17 00:00:00 2001 From: Grazvydas Ignotas Date: Mon, 17 Aug 2015 01:27:45 +0200 Subject: [PATCH 11/29] really fix ARM64 locking --- common_arm64.h | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/common_arm64.h b/common_arm64.h index c4e588d1f..15987c677 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -45,29 +45,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. static void __inline blas_lock(volatile BLASULONG *address){ - int register ret; - int register tmp; + long register ret; do { while (*address) {YIELDING;}; __asm__ __volatile__( - "1: \n\t" - "ldaxr %2, [%1] \n\t" - "mov %2, #0 \n\t" - "stlxr %w0, %2, [%1] \n\t" - "cbnz %w0, 1b \n\t" - "mov %0 , #0 \n\t" - : "=r"(ret), "=r"(address), "=r"(tmp) - : "1"(address) - : "memory", "%w0" - //, "%r2" , "%r3" - - + "ldaxr %0, [%1] \n\t" + "stlxr w2, %2, [%1] \n\t" + "orr %0, %0, x2 \n\t" + : "=r"(ret) + : "r"(address), "r"(1l) + : "memory", "x2" ); } while (ret); - + MB; } #define BLAS_LOCK_DEFINED From 7df08201606d903600d96c747c6ac070203a4d6f Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 19 Aug 2015 08:07:47 -0500 Subject: [PATCH 12/29] Use C kernels for s/dgemv on x86. --- kernel/x86/KERNEL | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/kernel/x86/KERNEL b/kernel/x86/KERNEL index 8b30355ec..39be2ef80 100644 --- a/kernel/x86/KERNEL +++ b/kernel/x86/KERNEL @@ -98,6 +98,23 @@ ifndef XAXPYKERNEL XAXPYKERNEL = xaxpy.S endif +#Use C kernel for sgemv and dgemv +ifndef SGEMVNKERNEL +SGEMVNKERNEL = ../arm/gemv_n.c +endif + +ifndef SGEMVTKERNEL +SGEMVTKERNEL = ../arm/gemv_t.c +endif + +ifndef DGEMVNKERNEL +DGEMVNKERNEL = ../arm/gemv_n.c +endif + +ifndef DGEMVTKERNEL +DGEMVTKERNEL = ../arm/gemv_t.c +endif + ifndef QGEMVNKERNEL QGEMVNKERNEL = qgemv_n.S endif From 50901943fde1fb09cc4149c007873d4e21d424c5 Mon Sep 17 00:00:00 2001 From: The Gitter Badger Date: Thu, 20 Aug 2015 03:21:09 +0000 Subject: [PATCH 13/29] Added Gitter badge --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index cdacf9888..16f874078 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # OpenBLAS +[![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) + [![Build Status](https://travis-ci.org/xianyi/OpenBLAS.png?branch=develop)](https://travis-ci.org/xianyi/OpenBLAS) ## Introduction From 5408074941d5cc0f4aad180562cafcf4cf27a56d Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 19 Aug 2015 22:50:25 -0500 Subject: [PATCH 14/29] Add notification. --- .travis.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.travis.yml b/.travis.yml index 7d625c9dc..806cb0046 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,13 @@ language: c + +notifications: + webhooks: + urls: + - https://webhooks.gitter.im/e/8a6e4470a0cebd090344 + on_success: change # options: [always|never|change] default: always + on_failure: always # options: [always|never|change] default: always + on_start: never # options: [always|never|change] default: always + compiler: - gcc From 2297a2d9893667d3222139013de7398680ec6f1a Mon Sep 17 00:00:00 2001 From: buffer51 Date: Thu, 3 Sep 2015 20:54:21 -0400 Subject: [PATCH 15/29] Fixed error in common.h for Android compilation introduced by e12cf1123e8784ce6fe9d2ac14526331fbe2c555 --- common.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/common.h b/common.h index a607c888b..c367e38cb 100644 --- a/common.h +++ b/common.h @@ -413,8 +413,10 @@ typedef char* env_var_t; #if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS) #ifdef _POSIX_MONOTONIC_CLOCK -#if defined(__GLIBC_PREREQ) && __GLIBC_PREREQ(2, 17) // don't require -lrt +#if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17) +#if __GLIBC_PREREQ(2, 17) // don't require -lrt #define USE_MONOTONIC +#endif #elif defined(OS_ANDROID) #define USE_MONOTONIC #endif From 711ca33bc6da03daf2115c7a82ae2a56f73d67a3 Mon Sep 17 00:00:00 2001 From: Martin Koehler Date: Mon, 7 Sep 2015 14:33:26 +0200 Subject: [PATCH 16/29] Improved Ximatcopy when lda==ldb. The Ximatcopy functions create a copy of the input matrix although they seem to work inplace. The new routines XIMATCOPY_K_YY perform the operations inplace if the leading dimension does not change. --- CONTRIBUTORS.md | 3 + common_c.h | 19 ++++ common_d.h | 9 ++ common_level3.h | 24 +++++ common_macro.h | 26 +++++ common_param.h | 30 ++++++ common_s.h | 8 ++ common_z.h | 18 ++++ interface/imatcopy.c | 35 ++++++- interface/zimatcopy.c | 50 ++++++++- kernel/Makefile.L3 | 181 +++++++++++++++++++++++++++++++++ kernel/generic/imatcopy_cn.c | 67 ++++++++++++ kernel/generic/imatcopy_ct.c | 91 +++++++++++++++++ kernel/generic/imatcopy_rn.c | 72 +++++++++++++ kernel/generic/imatcopy_rt.c | 64 ++++++++++++ kernel/generic/zimatcopy_cn.c | 67 ++++++++++++ kernel/generic/zimatcopy_cnc.c | 67 ++++++++++++ kernel/generic/zimatcopy_ct.c | 82 +++++++++++++++ kernel/generic/zimatcopy_ctc.c | 85 ++++++++++++++++ kernel/generic/zimatcopy_rn.c | 66 ++++++++++++ kernel/generic/zimatcopy_rnc.c | 65 ++++++++++++ kernel/generic/zimatcopy_rt.c | 80 +++++++++++++++ kernel/generic/zimatcopy_rtc.c | 82 +++++++++++++++ 23 files changed, 1288 insertions(+), 3 deletions(-) create mode 100644 kernel/generic/imatcopy_cn.c create mode 100644 kernel/generic/imatcopy_ct.c create mode 100644 kernel/generic/imatcopy_rn.c create mode 100644 kernel/generic/imatcopy_rt.c create mode 100644 kernel/generic/zimatcopy_cn.c create mode 100644 kernel/generic/zimatcopy_cnc.c create mode 100644 kernel/generic/zimatcopy_ct.c create mode 100644 kernel/generic/zimatcopy_ctc.c create mode 100644 kernel/generic/zimatcopy_rn.c create mode 100644 kernel/generic/zimatcopy_rnc.c create mode 100644 kernel/generic/zimatcopy_rt.c create mode 100644 kernel/generic/zimatcopy_rtc.c diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index b88e3671b..88e461dc4 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -127,5 +127,8 @@ In chronological order: * Ton van den Heuvel * [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity(). +* Martin Koehler + * [2015-09-07] Improved imatcopy + * [Your name or handle] <[email or website]> * [Date] [Brief summary of your changes] diff --git a/common_c.h b/common_c.h index 741d7d087..ce0f2a5bd 100644 --- a/common_c.h +++ b/common_c.h @@ -220,6 +220,15 @@ #define COMATCOPY_K_CTC comatcopy_k_ctc #define COMATCOPY_K_RTC comatcopy_k_rtc +#define CIMATCOPY_K_CN cimatcopy_k_cn +#define CIMATCOPY_K_RN cimatcopy_k_rn +#define CIMATCOPY_K_CT cimatcopy_k_ct +#define CIMATCOPY_K_RT cimatcopy_k_rt +#define CIMATCOPY_K_CNC cimatcopy_k_cnc +#define CIMATCOPY_K_RNC cimatcopy_k_rnc +#define CIMATCOPY_K_CTC cimatcopy_k_ctc +#define CIMATCOPY_K_RTC cimatcopy_k_rtc + #define CGEADD_K cgeadd_k #else @@ -403,6 +412,16 @@ #define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc #define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc #define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc + +#define CIMATCOPY_K_CN gotoblas -> cimatcopy_k_cn +#define CIMATCOPY_K_RN gotoblas -> cimatcopy_k_rn +#define CIMATCOPY_K_CT gotoblas -> cimatcopy_k_ct +#define CIMATCOPY_K_RT gotoblas -> cimatcopy_k_rt +#define CIMATCOPY_K_CNC gotoblas -> cimatcopy_k_cnc +#define CIMATCOPY_K_RNC gotoblas -> cimatcopy_k_rnc +#define CIMATCOPY_K_CTC gotoblas -> cimatcopy_k_ctc +#define CIMATCOPY_K_RTC gotoblas -> cimatcopy_k_rtc + #define CGEADD_K gotoblas -> cgeadd_k #endif diff --git a/common_d.h b/common_d.h index d6dfd7f04..ad9945186 100644 --- a/common_d.h +++ b/common_d.h @@ -149,6 +149,11 @@ #define DOMATCOPY_K_RN domatcopy_k_rn #define DOMATCOPY_K_CT domatcopy_k_ct #define DOMATCOPY_K_RT domatcopy_k_rt + +#define DIMATCOPY_K_CN dimatcopy_k_cn +#define DIMATCOPY_K_RN dimatcopy_k_rn +#define DIMATCOPY_K_CT dimatcopy_k_ct +#define DIMATCOPY_K_RT dimatcopy_k_rt #define DGEADD_K dgeadd_k #else @@ -267,6 +272,10 @@ #define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn #define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct #define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt +#define DIMATCOPY_K_CN gotoblas -> dimatcopy_k_cn +#define DIMATCOPY_K_RN gotoblas -> dimatcopy_k_rn +#define DIMATCOPY_K_CT gotoblas -> dimatcopy_k_ct +#define DIMATCOPY_K_RT gotoblas -> dimatcopy_k_rt #define DGEADD_K gotoblas -> dgeadd_k diff --git a/common_level3.h b/common_level3.h index e0ecbc4e2..1f5490baa 100644 --- a/common_level3.h +++ b/common_level3.h @@ -1736,31 +1736,55 @@ int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLAS int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); +int simatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG); +int simatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG); +int simatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG); +int simatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG); int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); +int dimatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG); +int dimatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG); +int dimatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG); +int dimatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG); int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int cimatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG); +int cimatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG); +int cimatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG); +int cimatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG); int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int cimatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG); +int cimatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG); +int cimatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG); +int cimatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG); int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zimatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG); +int zimatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG); +int zimatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG); +int zimatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG); int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zimatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG); +int zimatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG); +int zimatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG); +int zimatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG); int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG); int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG); diff --git a/common_macro.h b/common_macro.h index 8555baa67..4976e766f 100644 --- a/common_macro.h +++ b/common_macro.h @@ -634,6 +634,11 @@ #define OMATCOPY_K_RN DOMATCOPY_K_RN #define OMATCOPY_K_CT DOMATCOPY_K_CT #define OMATCOPY_K_RT DOMATCOPY_K_RT +#define IMATCOPY_K_CN DIMATCOPY_K_CN +#define IMATCOPY_K_RN DIMATCOPY_K_RN +#define IMATCOPY_K_CT DIMATCOPY_K_CT +#define IMATCOPY_K_RT DIMATCOPY_K_RT + #define GEADD_K DGEADD_K #else @@ -931,6 +936,10 @@ #define OMATCOPY_K_RN SOMATCOPY_K_RN #define OMATCOPY_K_CT SOMATCOPY_K_CT #define OMATCOPY_K_RT SOMATCOPY_K_RT +#define IMATCOPY_K_CN SIMATCOPY_K_CN +#define IMATCOPY_K_RN SIMATCOPY_K_RN +#define IMATCOPY_K_CT SIMATCOPY_K_CT +#define IMATCOPY_K_RT SIMATCOPY_K_RT #define GEADD_K SGEADD_K #endif @@ -1747,6 +1756,15 @@ #define OMATCOPY_K_RNC ZOMATCOPY_K_RNC #define OMATCOPY_K_CTC ZOMATCOPY_K_CTC #define OMATCOPY_K_RTC ZOMATCOPY_K_RTC +#define IMATCOPY_K_CN ZIMATCOPY_K_CN +#define IMATCOPY_K_RN ZIMATCOPY_K_RN +#define IMATCOPY_K_CT ZIMATCOPY_K_CT +#define IMATCOPY_K_RT ZIMATCOPY_K_RT +#define IMATCOPY_K_CNC ZIMATCOPY_K_CNC +#define IMATCOPY_K_RNC ZIMATCOPY_K_RNC +#define IMATCOPY_K_CTC ZIMATCOPY_K_CTC +#define IMATCOPY_K_RTC ZIMATCOPY_K_RTC + #define GEADD_K ZGEADD_K #else @@ -2160,6 +2178,14 @@ #define OMATCOPY_K_RNC COMATCOPY_K_RNC #define OMATCOPY_K_CTC COMATCOPY_K_CTC #define OMATCOPY_K_RTC COMATCOPY_K_RTC +#define IMATCOPY_K_CN CIMATCOPY_K_CN +#define IMATCOPY_K_RN CIMATCOPY_K_RN +#define IMATCOPY_K_CT CIMATCOPY_K_CT +#define IMATCOPY_K_RT CIMATCOPY_K_RT +#define IMATCOPY_K_CNC CIMATCOPY_K_CNC +#define IMATCOPY_K_RNC CIMATCOPY_K_RNC +#define IMATCOPY_K_CTC CIMATCOPY_K_CTC +#define IMATCOPY_K_RTC CIMATCOPY_K_RTC #define GEADD_K CGEADD_K diff --git a/common_param.h b/common_param.h index 1b56e85f0..ab40ddeef 100644 --- a/common_param.h +++ b/common_param.h @@ -830,31 +830,61 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); + int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); + int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); + int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); + int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); + int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); diff --git a/common_s.h b/common_s.h index a4d8679b7..3c1600859 100644 --- a/common_s.h +++ b/common_s.h @@ -152,6 +152,10 @@ #define SOMATCOPY_K_RN somatcopy_k_rn #define SOMATCOPY_K_CT somatcopy_k_ct #define SOMATCOPY_K_RT somatcopy_k_rt +#define SIMATCOPY_K_CN simatcopy_k_cn +#define SIMATCOPY_K_RN simatcopy_k_rn +#define SIMATCOPY_K_CT simatcopy_k_ct +#define SIMATCOPY_K_RT simatcopy_k_rt #define SGEADD_K sgeadd_k @@ -274,6 +278,10 @@ #define SOMATCOPY_K_RN gotoblas -> somatcopy_k_rn #define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct #define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt +#define SIMATCOPY_K_CN gotoblas -> simatcopy_k_cn +#define SIMATCOPY_K_RN gotoblas -> simatcopy_k_rn +#define SIMATCOPY_K_CT gotoblas -> simatcopy_k_ct +#define SIMATCOPY_K_RT gotoblas -> simatcopy_k_rt #define SGEADD_K gotoblas -> sgeadd_k diff --git a/common_z.h b/common_z.h index b17122776..b4f58bb0c 100644 --- a/common_z.h +++ b/common_z.h @@ -220,6 +220,15 @@ #define ZOMATCOPY_K_CTC zomatcopy_k_ctc #define ZOMATCOPY_K_RTC zomatcopy_k_rtc +#define ZIMATCOPY_K_CN zimatcopy_k_cn +#define ZIMATCOPY_K_RN zimatcopy_k_rn +#define ZIMATCOPY_K_CT zimatcopy_k_ct +#define ZIMATCOPY_K_RT zimatcopy_k_rt +#define ZIMATCOPY_K_CNC zimatcopy_k_cnc +#define ZIMATCOPY_K_RNC zimatcopy_k_rnc +#define ZIMATCOPY_K_CTC zimatcopy_k_ctc +#define ZIMATCOPY_K_RTC zimatcopy_k_rtc + #define ZGEADD_K zgeadd_k #else @@ -404,6 +413,15 @@ #define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc #define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc +#define ZIMATCOPY_K_CN gotoblas -> zimatcopy_k_cn +#define ZIMATCOPY_K_RN gotoblas -> zimatcopy_k_rn +#define ZIMATCOPY_K_CT gotoblas -> zimatcopy_k_ct +#define ZIMATCOPY_K_RT gotoblas -> zimatcopy_k_rt +#define ZIMATCOPY_K_CNC gotoblas -> zimatcopy_k_cnc +#define ZIMATCOPY_K_RNC gotoblas -> zimatcopy_k_rnc +#define ZIMATCOPY_K_CTC gotoblas -> zimatcopy_k_ctc +#define ZIMATCOPY_K_RTC gotoblas -> zimatcopy_k_rtc + #define ZGEADD_K gotoblas -> zgeadd_k #endif diff --git a/interface/imatcopy.c b/interface/imatcopy.c index 89f0ec823..f4309a85c 100644 --- a/interface/imatcopy.c +++ b/interface/imatcopy.c @@ -26,7 +26,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ /*********************************************************** - * 2014/06/10 Saar + * 2014-06-10 Saar + * 2015-09-07 grisuthedragon ***********************************************************/ #include @@ -50,6 +51,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #undef malloc #undef free +/* Enables the New IMATCOPY code with inplace operation if lda == ldb */ +#define NEW_IMATCOPY + #ifndef CBLAS void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb) { @@ -75,7 +79,6 @@ void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, #else void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, FLOAT calpha, FLOAT *a, blasint clda, blasint cldb) { - char Order, Trans; int order=-1,trans=-1; blasint info = -1; FLOAT *b; @@ -117,6 +120,34 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } +#ifdef NEW_IMATCOPY + if ( *lda == *ldb ) { + if ( order == BlasColMajor ) + { + if ( trans == BlasNoTrans ) + { + IMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda ); + } + else + { + IMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda ); + } + } + else + { + if ( trans == BlasNoTrans ) + { + IMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda ); + } + else + { + IMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda ); + } + } + return; + } + +#endif if ( *lda > *ldb ) msize = (*lda) * (*ldb) * sizeof(FLOAT); diff --git a/interface/zimatcopy.c b/interface/zimatcopy.c index 3f273cf13..798bff585 100644 --- a/interface/zimatcopy.c +++ b/interface/zimatcopy.c @@ -26,7 +26,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ /*********************************************************** - * 2014/06/10 Saar + * 2014-06-10 Saar + * 2015-09-07 grisuthedragon ***********************************************************/ #include @@ -49,6 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define BlasTransConj 2 #define BlasConj 3 +#define NEW_IMATCOPY #ifndef CBLAS void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb) @@ -124,6 +126,52 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, return; } +#ifdef NEW_IMATCOPY + if (*lda == *ldb) { + if ( order == BlasColMajor ) + { + + if ( trans == BlasNoTrans ) + { + IMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda ); + } + if ( trans == BlasConj ) + { + IMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda ); + } + if ( trans == BlasTrans ) + { + IMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda ); + } + if ( trans == BlasTransConj ) + { + IMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda ); + } + } + else + { + + if ( trans == BlasNoTrans ) + { + IMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda ); + } + if ( trans == BlasConj ) + { + IMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda ); + } + if ( trans == BlasTrans ) + { + IMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda ); + } + if ( trans == BlasTransConj ) + { + IMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda ); + } + } + return(0); + } +#endif + if ( *lda > *ldb ) msize = (*lda) * (*ldb) * sizeof(FLOAT) * 2; else diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 4ef351de3..7da4bcb92 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -334,11 +334,15 @@ endif SBLASOBJS += \ somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + simatcopy_k_cn$(TSUFFIX).$(SUFFIX) simatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + simatcopy_k_ct$(TSUFFIX).$(SUFFIX) simatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ sgeadd_k$(TSUFFIX).$(SUFFIX) DBLASOBJS += \ domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ dgeadd_k$(TSUFFIX).$(SUFFIX) CBLASOBJS += \ @@ -346,6 +350,10 @@ CBLASOBJS += \ comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ + cimatcopy_k_cn$(TSUFFIX).$(SUFFIX) cimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + cimatcopy_k_ct$(TSUFFIX).$(SUFFIX) cimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ + cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ cgeadd_k$(TSUFFIX).$(SUFFIX) ZBLASOBJS += \ @@ -353,6 +361,10 @@ ZBLASOBJS += \ zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ + zimatcopy_k_cn$(TSUFFIX).$(SUFFIX) zimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + zimatcopy_k_ct$(TSUFFIX).$(SUFFIX) zimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ + zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ zgeadd_k$(TSUFFIX).$(SUFFIX) @@ -3305,6 +3317,34 @@ endif $(KDIR)domatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RT) $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ +ifndef DIMATCOPY_CN +DIMATCOPY_CN = ../generic/imatcopy_cn.c +endif + +$(KDIR)dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DIMATCOPY_CN) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef DIMATCOPY_RN +DIMATCOPY_RN = ../generic/imatcopy_rn.c +endif + +$(KDIR)dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DIMATCOPY_RN) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ + +ifndef DIMATCOPY_CT +DIMATCOPY_CT = ../generic/imatcopy_ct.c +endif + +$(KDIR)dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DIMATCOPY_CT) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef DIMATCOPY_RT +DIMATCOPY_RT = ../generic/imatcopy_rt.c +endif + +$(KDIR)dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DIMATCOPY_RT) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ + ifndef SOMATCOPY_CN SOMATCOPY_CN = ../arm/omatcopy_cn.c endif @@ -3333,6 +3373,34 @@ endif $(KDIR)somatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_RT) $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ +ifndef SIMATCOPY_CN +SIMATCOPY_CN = ../generic/imatcopy_cn.c +endif + +$(KDIR)simatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SIMATCOPY_CN) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef SIMATCOPY_RN +SIMATCOPY_RN = ../generic/imatcopy_rn.c +endif + +$(KDIR)simatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SIMATCOPY_RN) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ + +ifndef SIMATCOPY_CT +SIMATCOPY_CT = ../generic/imatcopy_ct.c +endif + +$(KDIR)simatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SIMATCOPY_CT) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef SIMATCOPY_RT +SIMATCOPY_RT = ../generic/imatcopy_rt.c +endif + +$(KDIR)simatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SIMATCOPY_RT) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ + ifndef COMATCOPY_CN COMATCOPY_CN = ../arm/zomatcopy_cn.c @@ -3390,6 +3458,63 @@ endif $(KDIR)comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RTC) $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ +ifndef CIMATCOPY_CN +CIMATCOPY_CN = ../generic/zimatcopy_cn.c +endif + +$(KDIR)cimatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_CN) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef CIMATCOPY_RN +CIMATCOPY_RN = ../generic/zimatcopy_rn.c +endif + +$(KDIR)cimatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_RN) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef CIMATCOPY_CT +CIMATCOPY_CT = ../generic/zimatcopy_ct.c +endif + +$(KDIR)cimatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_CT) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef CIMATCOPY_RT +CIMATCOPY_RT = ../generic/zimatcopy_rt.c +endif + +$(KDIR)cimatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_RT) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef CIMATCOPY_CNC +CIMATCOPY_CNC = ../generic/zimatcopy_cnc.c +endif + +$(KDIR)cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_CNC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef CIMATCOPY_RNC +CIMATCOPY_RNC = ../generic/zimatcopy_rnc.c +endif + +$(KDIR)cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_RNC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + +ifndef CIMATCOPY_CTC +CIMATCOPY_CTC = ../generic/zimatcopy_ctc.c +endif + +$(KDIR)cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_CTC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef CIMATCOPY_RTC +CIMATCOPY_RTC = ../generic/zimatcopy_rtc.c +endif + +$(KDIR)cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CIMATCOPY_RTC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + + ifndef ZOMATCOPY_CN ZOMATCOPY_CN = ../arm/zomatcopy_cn.c @@ -3447,6 +3572,62 @@ endif $(KDIR)zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RTC) $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ +ifndef ZIMATCOPY_CN +ZIMATCOPY_CN = ../generic/zimatcopy_cn.c +endif + +$(KDIR)zimatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_CN) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef ZIMATCOPY_RN +ZIMATCOPY_RN = ../generic/zimatcopy_rn.c +endif + +$(KDIR)zimatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_RN) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef ZIMATCOPY_CT +ZIMATCOPY_CT = ../generic/zimatcopy_ct.c +endif + +$(KDIR)zimatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_CT) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef ZIMATCOPY_RT +ZIMATCOPY_RT = ../generic/zimatcopy_rt.c +endif + +$(KDIR)zimatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_RT) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef ZIMATCOPY_CNC +ZIMATCOPY_CNC = ../generic/zimatcopy_cnc.c +endif + +$(KDIR)zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_CNC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef ZIMATCOPY_RNC +ZIMATCOPY_RNC = ../generic/zimatcopy_rnc.c +endif + +$(KDIR)zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_RNC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + +ifndef ZIMATCOPY_CTC +ZIMATCOPY_CTC = ../generic/zimatcopy_ctc.c +endif + +$(KDIR)zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_CTC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef ZIMATCOPY_RTC +ZIMATCOPY_RTC = ../generic/zimatcopy_rtc.c +endif + +$(KDIR)zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZIMATCOPY_RTC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + ifndef SGEADD_K SGEADD_K = ../generic/geadd.c diff --git a/kernel/generic/imatcopy_cn.c b/kernel/generic/imatcopy_cn.c new file mode 100644 index 000000000..e63bc976c --- /dev/null +++ b/kernel/generic/imatcopy_cn.c @@ -0,0 +1,67 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +/***************************************************** + * 2015-09-07 grisuthedragon +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda) +{ + BLASLONG i,j; + FLOAT *aptr; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + if ( alpha == 1.0 ) return(0); + + aptr = a; + if ( alpha == 0.0 ) + { + for ( i=0; i Date: Wed, 9 Sep 2015 09:25:48 +0930 Subject: [PATCH 17/29] Fix lantr preparation for row major matrices --- lapack-netlib/lapacke/src/lapacke_clantr.c | 2 +- lapack-netlib/lapacke/src/lapacke_clantr_work.c | 6 +++--- lapack-netlib/lapacke/src/lapacke_dlantr.c | 2 +- lapack-netlib/lapacke/src/lapacke_dlantr_work.c | 6 +++--- lapack-netlib/lapacke/src/lapacke_slantr.c | 2 +- lapack-netlib/lapacke/src/lapacke_slantr_work.c | 6 +++--- lapack-netlib/lapacke/src/lapacke_zlantr.c | 2 +- lapack-netlib/lapacke/src/lapacke_zlantr_work.c | 6 +++--- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/lapack-netlib/lapacke/src/lapacke_clantr.c b/lapack-netlib/lapacke/src/lapacke_clantr.c index 77743f2d5..00ba34273 100644 --- a/lapack-netlib/lapacke/src/lapacke_clantr.c +++ b/lapack-netlib/lapacke/src/lapacke_clantr.c @@ -53,7 +53,7 @@ float LAPACKE_clantr( int matrix_order, char norm, char uplo, char diag, /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, '0' ) ) { - work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,m) ); + work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,MAX(m,n)) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; diff --git a/lapack-netlib/lapacke/src/lapacke_clantr_work.c b/lapack-netlib/lapacke/src/lapacke_clantr_work.c index cb253a11e..1fa8cd923 100644 --- a/lapack-netlib/lapacke/src/lapacke_clantr_work.c +++ b/lapack-netlib/lapacke/src/lapacke_clantr_work.c @@ -47,7 +47,7 @@ float LAPACKE_clantr_work( int matrix_order, char norm, char uplo, info = info - 1; } } else if( matrix_order == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,n); + lapack_int lda_t = MAX(1,m); lapack_complex_float* a_t = NULL; /* Check leading dimension(s) */ if( lda < n ) { @@ -57,13 +57,13 @@ float LAPACKE_clantr_work( int matrix_order, char norm, char uplo, } /* Allocate memory for temporary array(s) */ a_t = (lapack_complex_float*) - LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) ); + LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,MAX(m,n)) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; } /* Transpose input matrices */ - LAPACKE_ctr_trans( matrix_order, uplo, diag, n, a, lda, a_t, lda_t ); + LAPACKE_ctr_trans( matrix_order, uplo, diag, MAX(m,n), a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ res = LAPACK_clantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work ); info = 0; /* LAPACK call is ok! */ diff --git a/lapack-netlib/lapacke/src/lapacke_dlantr.c b/lapack-netlib/lapacke/src/lapacke_dlantr.c index 522122cb2..2cde1ebad 100644 --- a/lapack-netlib/lapacke/src/lapacke_dlantr.c +++ b/lapack-netlib/lapacke/src/lapacke_dlantr.c @@ -53,7 +53,7 @@ double LAPACKE_dlantr( int matrix_order, char norm, char uplo, char diag, /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, '0' ) ) { - work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,m) ); + work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,MAX(m,n)) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; diff --git a/lapack-netlib/lapacke/src/lapacke_dlantr_work.c b/lapack-netlib/lapacke/src/lapacke_dlantr_work.c index 0a937bda9..44d638fa5 100644 --- a/lapack-netlib/lapacke/src/lapacke_dlantr_work.c +++ b/lapack-netlib/lapacke/src/lapacke_dlantr_work.c @@ -46,7 +46,7 @@ double LAPACKE_dlantr_work( int matrix_order, char norm, char uplo, info = info - 1; } } else if( matrix_order == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,n); + lapack_int lda_t = MAX(1,m); double* a_t = NULL; /* Check leading dimension(s) */ if( lda < n ) { @@ -55,13 +55,13 @@ double LAPACKE_dlantr_work( int matrix_order, char norm, char uplo, return info; } /* Allocate memory for temporary array(s) */ - a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) ); + a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,MAX(m,n)) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; } /* Transpose input matrices */ - LAPACKE_dtr_trans( matrix_order, uplo, diag, n, a, lda, a_t, lda_t ); + LAPACKE_dtr_trans( matrix_order, uplo, diag, MAX(m,n), a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work ); info = 0; /* LAPACK call is ok! */ diff --git a/lapack-netlib/lapacke/src/lapacke_slantr.c b/lapack-netlib/lapacke/src/lapacke_slantr.c index d6a512027..80313d118 100644 --- a/lapack-netlib/lapacke/src/lapacke_slantr.c +++ b/lapack-netlib/lapacke/src/lapacke_slantr.c @@ -53,7 +53,7 @@ float LAPACKE_slantr( int matrix_order, char norm, char uplo, char diag, /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, '0' ) ) { - work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,m) ); + work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,MAX(m,n)) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; diff --git a/lapack-netlib/lapacke/src/lapacke_slantr_work.c b/lapack-netlib/lapacke/src/lapacke_slantr_work.c index 2389468d0..9032f7094 100644 --- a/lapack-netlib/lapacke/src/lapacke_slantr_work.c +++ b/lapack-netlib/lapacke/src/lapacke_slantr_work.c @@ -46,7 +46,7 @@ float LAPACKE_slantr_work( int matrix_order, char norm, char uplo, info = info - 1; } } else if( matrix_order == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,n); + lapack_int lda_t = MAX(1,m); float* a_t = NULL; /* Check leading dimension(s) */ if( lda < n ) { @@ -55,13 +55,13 @@ float LAPACKE_slantr_work( int matrix_order, char norm, char uplo, return info; } /* Allocate memory for temporary array(s) */ - a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,n) ); + a_t = (float*)LAPACKE_malloc( sizeof(float) * lda_t * MAX(1,MAX(m,n)) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; } /* Transpose input matrices */ - LAPACKE_str_trans( matrix_order, uplo, diag, n, a, lda, a_t, lda_t ); + LAPACKE_str_trans( matrix_order, uplo, diag, MAX(m,n), a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ res = LAPACK_slantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work ); info = 0; /* LAPACK call is ok! */ diff --git a/lapack-netlib/lapacke/src/lapacke_zlantr.c b/lapack-netlib/lapacke/src/lapacke_zlantr.c index 887bc2eea..001ce68f6 100644 --- a/lapack-netlib/lapacke/src/lapacke_zlantr.c +++ b/lapack-netlib/lapacke/src/lapacke_zlantr.c @@ -53,7 +53,7 @@ double LAPACKE_zlantr( int matrix_order, char norm, char uplo, char diag, /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, '0' ) ) { - work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,m) ); + work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,MAX(m,n)) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; diff --git a/lapack-netlib/lapacke/src/lapacke_zlantr_work.c b/lapack-netlib/lapacke/src/lapacke_zlantr_work.c index 65e741428..8700a6ee2 100644 --- a/lapack-netlib/lapacke/src/lapacke_zlantr_work.c +++ b/lapack-netlib/lapacke/src/lapacke_zlantr_work.c @@ -47,7 +47,7 @@ double LAPACKE_zlantr_work( int matrix_order, char norm, char uplo, info = info - 1; } } else if( matrix_order == LAPACK_ROW_MAJOR ) { - lapack_int lda_t = MAX(1,n); + lapack_int lda_t = MAX(1,m); lapack_complex_double* a_t = NULL; /* Check leading dimension(s) */ if( lda < n ) { @@ -57,13 +57,13 @@ double LAPACKE_zlantr_work( int matrix_order, char norm, char uplo, } /* Allocate memory for temporary array(s) */ a_t = (lapack_complex_double*) - LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,n) ); + LAPACKE_malloc( sizeof(lapack_complex_double) * lda_t * MAX(1,MAX(m,n)) ); if( a_t == NULL ) { info = LAPACK_TRANSPOSE_MEMORY_ERROR; goto exit_level_0; } /* Transpose input matrices */ - LAPACKE_ztr_trans( matrix_order, uplo, diag, n, a, lda, a_t, lda_t ); + LAPACKE_ztr_trans( matrix_order, uplo, diag, MAX(m,n), a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ res = LAPACK_zlantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work ); info = 0; /* LAPACK call is ok! */ From 61ae47eb9926a869942267b3dc1b62a139e36ebe Mon Sep 17 00:00:00 2001 From: Yichao Yu Date: Wed, 9 Sep 2015 11:00:23 -0400 Subject: [PATCH 18/29] Ref #632. Support Intel Skylake by Haswell kernels. --- cpuid_x86.c | 20 ++++++++++++++++++++ driver/others/dynamic.c | 9 +++++++++ 2 files changed, 29 insertions(+) diff --git a/cpuid_x86.c b/cpuid_x86.c index 4f97cfb5a..828ecc328 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1147,6 +1147,16 @@ int get_cpuname(void){ return CPUTYPE_HASWELL; #else return CPUTYPE_SANDYBRIDGE; +#endif + else + return CPUTYPE_NEHALEM; + case 14: + // Skylake + if(support_avx()) +#ifndef NO_AVX2 + return CPUTYPE_HASWELL; +#else + return CPUTYPE_SANDYBRIDGE; #endif else return CPUTYPE_NEHALEM; @@ -1622,6 +1632,16 @@ int get_coretype(void){ return CORE_HASWELL; #else return CORE_SANDYBRIDGE; +#endif + else + return CORE_NEHALEM; + case 14: + // Skylake + if(support_avx()) +#ifndef NO_AVX2 + return CORE_HASWELL; +#else + return CORE_SANDYBRIDGE; #endif else return CORE_NEHALEM; diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index ff80504f9..1f70b798c 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -263,6 +263,15 @@ static gotoblas_t *get_coretype(void){ return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } + //Intel Skylake + if (model == 14) { + if(support_avx()) + return &gotoblas_HASWELL; + else{ + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. + } + } return NULL; } case 0xf: From cc7cab8a45d031e7e0e78147a863a632d584ed9d Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 9 Sep 2015 10:47:17 -0500 Subject: [PATCH 19/29] Detect other Intel Skylake cores. http://users.atw.hu/instlatx64/ --- cpuid_x86.c | 22 ++++++++++++++++++++++ driver/others/dynamic.c | 11 ++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/cpuid_x86.c b/cpuid_x86.c index 828ecc328..135ac7cf9 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1133,6 +1133,16 @@ int get_cpuname(void){ return CPUTYPE_HASWELL; #else return CPUTYPE_SANDYBRIDGE; +#endif + else + return CPUTYPE_NEHALEM; + case 14: + //Skylake + if(support_avx()) +#ifndef NO_AVX2 + return CPUTYPE_HASWELL; +#else + return CPUTYPE_SANDYBRIDGE; #endif else return CPUTYPE_NEHALEM; @@ -1150,6 +1160,7 @@ int get_cpuname(void){ #endif else return CPUTYPE_NEHALEM; + case 5: case 14: // Skylake if(support_avx()) @@ -1618,6 +1629,16 @@ int get_coretype(void){ return CORE_HASWELL; #else return CORE_SANDYBRIDGE; +#endif + else + return CORE_NEHALEM; + case 14: + //Skylake + if(support_avx()) +#ifndef NO_AVX2 + return CORE_HASWELL; +#else + return CORE_SANDYBRIDGE; #endif else return CORE_NEHALEM; @@ -1635,6 +1656,7 @@ int get_coretype(void){ #endif else return CORE_NEHALEM; + case 5: case 14: // Skylake if(support_avx()) diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 1f70b798c..c41164559 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -252,6 +252,15 @@ static gotoblas_t *get_coretype(void){ return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. } } + //Intel Skylake + if (model == 14) { + if(support_avx()) + return &gotoblas_HASWELL; + else{ + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels. + } + } return NULL; case 5: //Intel Broadwell @@ -264,7 +273,7 @@ static gotoblas_t *get_coretype(void){ } } //Intel Skylake - if (model == 14) { + if (model == 14 || model == 5) { if(support_avx()) return &gotoblas_HASWELL; else{ From d6e8459f201ec0e95da31d9886f413f9fd10a034 Mon Sep 17 00:00:00 2001 From: kortschak Date: Thu, 10 Sep 2015 15:32:50 +0930 Subject: [PATCH 20/29] Fix LAPACK_*lansy routines Fixes #639. --- lapack-netlib/lapacke/src/lapacke_clansy.c | 4 ++-- lapack-netlib/lapacke/src/lapacke_dlansy.c | 4 ++-- lapack-netlib/lapacke/src/lapacke_slansy.c | 4 ++-- lapack-netlib/lapacke/src/lapacke_zlansy.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lapack-netlib/lapacke/src/lapacke_clansy.c b/lapack-netlib/lapacke/src/lapacke_clansy.c index 84a9d965a..eb9951145 100644 --- a/lapack-netlib/lapacke/src/lapacke_clansy.c +++ b/lapack-netlib/lapacke/src/lapacke_clansy.c @@ -51,7 +51,7 @@ float LAPACKE_clansy( int matrix_order, char norm, char uplo, lapack_int n, #endif /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || - LAPACKE_lsame( norm, '0' ) ) { + LAPACKE_lsame( norm, 'O' ) ) { work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,n) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; @@ -62,7 +62,7 @@ float LAPACKE_clansy( int matrix_order, char norm, char uplo, lapack_int n, res = LAPACKE_clansy_work( matrix_order, norm, uplo, n, a, lda, work ); /* Release memory and exit */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || - LAPACKE_lsame( norm, '0' ) ) { + LAPACKE_lsame( norm, 'O' ) ) { LAPACKE_free( work ); } exit_level_0: diff --git a/lapack-netlib/lapacke/src/lapacke_dlansy.c b/lapack-netlib/lapacke/src/lapacke_dlansy.c index 5e6721ef8..3d9964202 100644 --- a/lapack-netlib/lapacke/src/lapacke_dlansy.c +++ b/lapack-netlib/lapacke/src/lapacke_dlansy.c @@ -51,7 +51,7 @@ double LAPACKE_dlansy( int matrix_order, char norm, char uplo, lapack_int n, #endif /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || - LAPACKE_lsame( norm, '0' ) ) { + LAPACKE_lsame( norm, 'O' ) ) { work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,n) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; @@ -62,7 +62,7 @@ double LAPACKE_dlansy( int matrix_order, char norm, char uplo, lapack_int n, res = LAPACKE_dlansy_work( matrix_order, norm, uplo, n, a, lda, work ); /* Release memory and exit */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || - LAPACKE_lsame( norm, '0' ) ) { + LAPACKE_lsame( norm, 'O' ) ) { LAPACKE_free( work ); } exit_level_0: diff --git a/lapack-netlib/lapacke/src/lapacke_slansy.c b/lapack-netlib/lapacke/src/lapacke_slansy.c index 105ce4635..adad99b7d 100644 --- a/lapack-netlib/lapacke/src/lapacke_slansy.c +++ b/lapack-netlib/lapacke/src/lapacke_slansy.c @@ -51,7 +51,7 @@ float LAPACKE_slansy( int matrix_order, char norm, char uplo, lapack_int n, #endif /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || - LAPACKE_lsame( norm, '0' ) ) { + LAPACKE_lsame( norm, 'O' ) ) { work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,n) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; @@ -62,7 +62,7 @@ float LAPACKE_slansy( int matrix_order, char norm, char uplo, lapack_int n, res = LAPACKE_slansy_work( matrix_order, norm, uplo, n, a, lda, work ); /* Release memory and exit */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || - LAPACKE_lsame( norm, '0' ) ) { + LAPACKE_lsame( norm, 'O' ) ) { LAPACKE_free( work ); } exit_level_0: diff --git a/lapack-netlib/lapacke/src/lapacke_zlansy.c b/lapack-netlib/lapacke/src/lapacke_zlansy.c index 891437846..460a51a85 100644 --- a/lapack-netlib/lapacke/src/lapacke_zlansy.c +++ b/lapack-netlib/lapacke/src/lapacke_zlansy.c @@ -51,7 +51,7 @@ double LAPACKE_zlansy( int matrix_order, char norm, char uplo, lapack_int n, #endif /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || - LAPACKE_lsame( norm, '0' ) ) { + LAPACKE_lsame( norm, 'O' ) ) { work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,n) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; @@ -62,7 +62,7 @@ double LAPACKE_zlansy( int matrix_order, char norm, char uplo, lapack_int n, res = LAPACKE_zlansy_work( matrix_order, norm, uplo, n, a, lda, work ); /* Release memory and exit */ if( LAPACKE_lsame( norm, 'i' ) || LAPACKE_lsame( norm, '1' ) || - LAPACKE_lsame( norm, '0' ) ) { + LAPACKE_lsame( norm, 'O' ) ) { LAPACKE_free( work ); } exit_level_0: From baec8f5cacfb2be6e1a73d4abfbb0eaf32d8d44a Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 10 Sep 2015 10:32:07 -0500 Subject: [PATCH 21/29] Refs #638. Fixed compiling bug with clang on Mac OS X. --- interface/zimatcopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/interface/zimatcopy.c b/interface/zimatcopy.c index 798bff585..b1e1d15dc 100644 --- a/interface/zimatcopy.c +++ b/interface/zimatcopy.c @@ -168,7 +168,7 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, IMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda ); } } - return(0); + return; } #endif From 45c8b5e7567926872fd5ef69a73f5bd5e51efa39 Mon Sep 17 00:00:00 2001 From: Lars Buitinck Date: Tue, 22 Sep 2015 12:00:30 +0200 Subject: [PATCH 22/29] actually remove cblas_noconst.h This file hasn't been used since 212463dce961827421a9c54f109a430c1599732c. --- cblas_noconst.h | 350 ------------------------------------------------ 1 file changed, 350 deletions(-) delete mode 100644 cblas_noconst.h diff --git a/cblas_noconst.h b/cblas_noconst.h deleted file mode 100644 index 4451c304e..000000000 --- a/cblas_noconst.h +++ /dev/null @@ -1,350 +0,0 @@ -#ifndef CBLAS_H -#define CBLAS_H - -#include -#include "common.h" - -#ifdef __cplusplus -extern "C" { - /* Assume C declarations for C++ */ -#endif /* __cplusplus */ - -/*Set the number of threads on runtime.*/ -void openblas_set_num_threads(int num_threads); -void goto_set_num_threads(int num_threads); - -/*Get the number of threads on runtime.*/ -int openblas_get_num_threads(void); - -/*Get the number of physical processors (cores).*/ -int openblas_get_num_procs(void); - -/*Get the build configure on runtime.*/ -char* openblas_get_config(void); - -/* Get the parallelization type which is used by OpenBLAS */ -int openblas_get_parallel(void); -/* OpenBLAS is compiled for sequential use */ -#define OPENBLAS_SEQUENTIAL 0 -/* OpenBLAS is compiled using normal threading model */ -#define OPENBLAS_THREAD 1 -/* OpenBLAS is compiled using OpenMP threading model */ -#define OPENBLAS_OPENMP 2 - - -#define CBLAS_INDEX size_t - -typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; -typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; -typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO; -typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG; -typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE; - -float cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy); -double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy); -float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy); -double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy); - -openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy); -openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy); -openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy); -openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy); - -void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret); -void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret); -void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret); -void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret); - -float cblas_sasum (blasint n, float *x, blasint incx); -double cblas_dasum (blasint n, double *x, blasint incx); -float cblas_scasum(blasint n, float *x, blasint incx); -double cblas_dzasum(blasint n, double *x, blasint incx); - -float cblas_snrm2 (blasint N, float *X, blasint incX); -double cblas_dnrm2 (blasint N, double *X, blasint incX); -float cblas_scnrm2(blasint N, float *X, blasint incX); -double cblas_dznrm2(blasint N, double *X, blasint incX); - -CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx); -CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx); -CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx); -CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx); - -void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy); -void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy); -void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy); -void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy); - -void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy); -void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy); -void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy); -void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy); - -void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy); -void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy); -void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy); -void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy); - -void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s); -void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s); - -void cblas_srotg(float *a, float *b, float *c, float *s); -void cblas_drotg(double *a, double *b, double *c, double *s); - -void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P); -void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P); - -void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P); -void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P); - -void cblas_sscal(blasint N, float alpha, float *X, blasint incX); -void cblas_dscal(blasint N, double alpha, double *X, blasint incX); -void cblas_cscal(blasint N, float *alpha, float *X, blasint incX); -void cblas_zscal(blasint N, double *alpha, double *X, blasint incX); -void cblas_csscal(blasint N, float alpha, float *X, blasint incX); -void cblas_zdscal(blasint N, double alpha, double *X, blasint incX); - -void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n, - float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy); -void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n, - double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy); -void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n, - float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy); -void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n, - double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy); - -void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda); -void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda); -void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda); -void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda); -void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda); -void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda); - -void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX); -void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX); -void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX); -void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX); - -void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX); -void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX); -void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX); -void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX); - -void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda); -void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda); -void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda); -void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda); - -void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X, - blasint incX, float *Y, blasint incY, float *A, blasint lda); -void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, - blasint incX, double *Y, blasint incY, double *A, blasint lda); -void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, - float *Y, blasint incY, float *A, blasint lda); -void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, - double *Y, blasint incY, double *A, blasint lda); - -void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N, - blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY); -void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N, - blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY); -void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N, - blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY); -void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N, - blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY); - -void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A, - blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY); -void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A, - blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY); - - -void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, blasint K, float *A, blasint lda, float *X, blasint incX); -void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, blasint K, double *A, blasint lda, double *X, blasint incX); -void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, blasint K, float *A, blasint lda, float *X, blasint incX); -void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, blasint K, double *A, blasint lda, double *X, blasint incX); - -void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, blasint K, float *A, blasint lda, float *X, blasint incX); -void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, blasint K, double *A, blasint lda, double *X, blasint incX); -void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, blasint K, float *A, blasint lda, float *X, blasint incX); -void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, blasint K, double *A, blasint lda, double *X, blasint incX); - -void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, float *Ap, float *X, blasint incX); -void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, double *Ap, double *X, blasint incX); -void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, float *Ap, float *X, blasint incX); -void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, double *Ap, double *X, blasint incX); - -void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, float *Ap, float *X, blasint incX); -void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, double *Ap, double *X, blasint incX); -void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, float *Ap, float *X, blasint incX); -void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, - blasint N, double *Ap, double *X, blasint incX); - -void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A, - blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY); -void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A, - blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY); -void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A, - blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY); -void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A, - blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY); - - -void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap, - float *X, blasint incX, float beta, float *Y, blasint incY); -void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap, - double *X, blasint incX, double beta, double *Y, blasint incY); - -void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap); -void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap); - -void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A); -void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A); - -void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A); -void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A); -void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap); -void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap); - -void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, - float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY); -void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, - double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY); - -void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, - float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY); -void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, - double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY); - -void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, - float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc); -void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, - double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); -void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, - float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); -void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, - float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); -void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, - double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); -void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, - double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); - -void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, - float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc); -void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, - double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); -void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, - float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); -void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, - double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); - -void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, - blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc); -void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, - blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc); -void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, - blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc); -void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, - blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc); - -void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, - blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc); -void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, - blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); -void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, - blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); -void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, - blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); - -void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, - enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb); -void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, - enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb); -void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, - enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb); -void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, - enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb); - -void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, - enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb); -void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, - enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb); -void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, - enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb); -void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, - enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb); - -void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, - float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); -void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, - double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); - -void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K, - float alpha, float *A, blasint lda, float beta, float *C, blasint ldc); -void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K, - double alpha, double *A, blasint lda, double beta, double *C, blasint ldc); - -void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K, - float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc); -void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K, - double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); - -void cblas_xerbla(blasint p, char *rout, char *form, ...); - -/*** BLAS extensions ***/ - -void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy); - -void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy); - -void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy); - -void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy); - -void cblas_somatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a, - blasint clda, float *b, blasint cldb); -void cblas_domatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a, - blasint clda, double *b, blasint cldb); -void cblas_comatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a, - blasint clda, void *b, blasint cldb); -void cblas_zomatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a, - blasint clda, void *b, blasint cldb); - -void cblas_simatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a, - blasint clda, blasint cldb); -void cblas_dimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a, - blasint clda, blasint cldb); -void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float* calpha, float* a, - blasint clda, blasint cldb); -void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a, - blasint clda, blasint cldb); - -void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta, - float *c, blasint cldc); -void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta, - double *c, blasint cldc); -void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta, - float *c, blasint cldc); -void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta, - double *c, blasint cldc); - -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#endif From b9534bbd76fd09c63432f4a05c46de4c8d563614 Mon Sep 17 00:00:00 2001 From: Lars Buitinck Date: Tue, 22 Sep 2015 12:01:09 +0200 Subject: [PATCH 23/29] git ignore versioned .so files --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7422cead3..8df228993 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ lapack-netlib/make.inc lapack-netlib/lapacke/include/lapacke_mangling.h lapack-netlib/TESTING/testing_results.txt *.so +*.so.* *.a .svn *~ From f27942a68aced9933761c5d608dfb45e8fd10e8a Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Sat, 26 Sep 2015 14:42:44 +0000 Subject: [PATCH 24/29] Fixed make TARGET=CORTEXA9 and CORTEXA15 bug. --- getarch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/getarch.c b/getarch.c index 89e736a31..c05b90410 100644 --- a/getarch.c +++ b/getarch.c @@ -750,7 +750,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHITECTURE "ARM" #define SUBARCHITECTURE "CORTEXA9" #define SUBDIRNAME "arm" -#define ARCHCONFIG "-DCORTEXA9 " \ +#define ARCHCONFIG "-DCORTEXA9 -DARMV7 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ @@ -765,7 +765,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ARCHITECTURE "ARM" #define SUBARCHITECTURE "CORTEXA15" #define SUBDIRNAME "arm" -#define ARCHCONFIG "-DCORTEXA15 " \ +#define ARCHCONFIG "-DCORTEXA15 -DARMV7 " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \ "-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \ "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ From 88bef3bffcfd1715bb26cc410a7d60163dcfd4a1 Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Thu, 1 Oct 2015 15:07:04 -0400 Subject: [PATCH 25/29] default to lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX), as discussed in #646: if you rename the symbols, it is best to rename the library --- Makefile.rule | 3 +++ Makefile.system | 12 ++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Makefile.rule b/Makefile.rule index 19f3fe3d9..22f222e3f 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -169,6 +169,9 @@ COMMON_PROF = -pg # 64 bit integer interfaces in OpenBLAS. # For details, https://github.com/xianyi/OpenBLAS/pull/459 # +# The same prefix and suffix are also added to the library name, +# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas +# # SYMBOLPREFIX= # SYMBOLSUFFIX= diff --git a/Makefile.system b/Makefile.system index 325ee6af9..42ad49849 100644 --- a/Makefile.system +++ b/Makefile.system @@ -880,12 +880,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3 CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3 endif -ifndef LIBNAMESUFFIX -LIBPREFIX = libopenblas -else -LIBPREFIX = libopenblas_$(LIBNAMESUFFIX) -endif - ifndef SYMBOLPREFIX SYMBOLPREFIX = endif @@ -894,6 +888,12 @@ ifndef SYMBOLSUFFIX SYMBOLSUFFIX = endif +ifndef LIBNAMESUFFIX +LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) +else +LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX) +endif + KERNELDIR = $(TOPDIR)/kernel/$(ARCH) include $(TOPDIR)/Makefile.$(ARCH) From e9493f69ebc706a974b1650cded21b70115668de Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Mon, 5 Oct 2015 00:58:07 -0400 Subject: [PATCH 26/29] Fix cross compilation suffix detection If the path involves `-`, this would have otherwise detected this as a cross compile suffix. --- c_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_check b/c_check index 0fdadb659..d694e7411 100644 --- a/c_check +++ b/c_check @@ -30,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { $cross_suffix = $1; } } else { - if ($ARGV[0] =~ /(.*-)(.*)/) { + if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) { $cross_suffix = $1; } } From 11ac4665c835a27a097e5021074cbf366bcb9765 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 5 Oct 2015 14:14:32 -0500 Subject: [PATCH 27/29] Fixed #654. Make sure the gotoblas_init function is run before all other static initializations. --- driver/others/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 49c57f911..f75a47d65 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -139,8 +139,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define BITMASK(a, b, c) ((((a) >> (b)) & (c))) -#define CONSTRUCTOR __attribute__ ((constructor)) -#define DESTRUCTOR __attribute__ ((destructor)) +#define CONSTRUCTOR __attribute__ ((constructor(101))) +#define DESTRUCTOR __attribute__ ((destructor(101))) #ifdef DYNAMIC_ARCH gotoblas_t *gotoblas = NULL; From 90aa8e24b94ce8bbf73e60f9c69c50a2b18565da Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 7 Oct 2015 02:31:51 +0800 Subject: [PATCH 28/29] Refs #615. Import bug fixes for LAPACKE dormlq. --- lapack-netlib/lapacke/src/lapacke_cunmlq_work.c | 16 +++++++++------- lapack-netlib/lapacke/src/lapacke_dormlq_work.c | 16 +++++++++------- lapack-netlib/lapacke/src/lapacke_sormlq_work.c | 16 +++++++++------- lapack-netlib/lapacke/src/lapacke_zunmlq_work.c | 16 +++++++++------- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c b/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c index 1cd20e1ca..5cf66424d 100644 --- a/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_cunmlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_cunmlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_float* a, lapack_int lda, const lapack_complex_float* tau, @@ -41,20 +41,22 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, lapack_complex_float* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_order == LAPACK_COL_MAJOR ) { + lapack_int r; + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_cunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lapack_int lda_t = MAX(1,k); lapack_int ldc_t = MAX(1,m); lapack_complex_float* a_t = NULL; lapack_complex_float* c_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_cunmlq_work", info ); return info; @@ -84,8 +86,8 @@ lapack_int LAPACKE_cunmlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_cge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_cge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_cge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_cge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_cunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack-netlib/lapacke/src/lapacke_dormlq_work.c b/lapack-netlib/lapacke/src/lapacke_dormlq_work.c index 9a7a997fe..99a7c3c71 100644 --- a/lapack-netlib/lapacke/src/lapacke_dormlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_dormlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,27 +33,29 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_dormlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const double* a, lapack_int lda, const double* tau, double* c, lapack_int ldc, double* work, lapack_int lwork ) { lapack_int info = 0; + lapack_int r; lapack_int lda_t, ldc_t; double *a_t = NULL, *c_t = NULL; - if( matrix_order == LAPACK_COL_MAJOR ) { + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_dormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lda_t = MAX(1,k); ldc_t = MAX(1,m); /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_dormlq_work", info ); return info; @@ -81,8 +83,8 @@ lapack_int LAPACKE_dormlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_dge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_dge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_dge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_dge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_dormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack-netlib/lapacke/src/lapacke_sormlq_work.c b/lapack-netlib/lapacke/src/lapacke_sormlq_work.c index 7a7464d18..bbf55bd84 100644 --- a/lapack-netlib/lapacke/src/lapacke_sormlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_sormlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,27 +33,29 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_sormlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const float* a, lapack_int lda, const float* tau, float* c, lapack_int ldc, float* work, lapack_int lwork ) { lapack_int info = 0; + lapack_int r; lapack_int lda_t, ldc_t; float *a_t = NULL, *c_t = NULL; - if( matrix_order == LAPACK_COL_MAJOR ) { + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_sormlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lda_t = MAX(1,k); ldc_t = MAX(1,m); /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_sormlq_work", info ); return info; @@ -81,8 +83,8 @@ lapack_int LAPACKE_sormlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_sge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_sge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_sge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_sge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_sormlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); diff --git a/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c b/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c index 8677ac0bc..38a2d947a 100644 --- a/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c +++ b/lapack-netlib/lapacke/src/lapacke_zunmlq_work.c @@ -1,5 +1,5 @@ /***************************************************************************** - Copyright (c) 2011, Intel Corp. + Copyright (c) 2014, Intel Corp. All rights reserved. Redistribution and use in source and binary forms, with or without @@ -33,7 +33,7 @@ #include "lapacke_utils.h" -lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, +lapack_int LAPACKE_zunmlq_work( int matrix_layout, char side, char trans, lapack_int m, lapack_int n, lapack_int k, const lapack_complex_double* a, lapack_int lda, const lapack_complex_double* tau, @@ -41,20 +41,22 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, lapack_complex_double* work, lapack_int lwork ) { lapack_int info = 0; - if( matrix_order == LAPACK_COL_MAJOR ) { + lapack_int r; + if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ LAPACK_zunmlq( &side, &trans, &m, &n, &k, a, &lda, tau, c, &ldc, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } - } else if( matrix_order == LAPACK_ROW_MAJOR ) { + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + r = LAPACKE_lsame( side, 'l' ) ? m : n; lapack_int lda_t = MAX(1,k); lapack_int ldc_t = MAX(1,m); lapack_complex_double* a_t = NULL; lapack_complex_double* c_t = NULL; /* Check leading dimension(s) */ - if( lda < m ) { + if( lda < r ) { info = -8; LAPACKE_xerbla( "LAPACKE_zunmlq_work", info ); return info; @@ -84,8 +86,8 @@ lapack_int LAPACKE_zunmlq_work( int matrix_order, char side, char trans, goto exit_level_1; } /* Transpose input matrices */ - LAPACKE_zge_trans( matrix_order, k, m, a, lda, a_t, lda_t ); - LAPACKE_zge_trans( matrix_order, m, n, c, ldc, c_t, ldc_t ); + LAPACKE_zge_trans( matrix_layout, k, m, a, lda, a_t, lda_t ); + LAPACKE_zge_trans( matrix_layout, m, n, c, ldc, c_t, ldc_t ); /* Call LAPACK function and adjust info */ LAPACK_zunmlq( &side, &trans, &m, &n, &k, a_t, &lda_t, tau, c_t, &ldc_t, work, &lwork, &info ); From 3684706a121f9d9e1ccfc4a2bbb98f698eb04514 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 8 Oct 2015 15:07:24 +0000 Subject: [PATCH 29/29] Include time.h. --- common.h | 1 + 1 file changed, 1 insertion(+) diff --git a/common.h b/common.h index c367e38cb..c7660a7dd 100644 --- a/common.h +++ b/common.h @@ -114,6 +114,7 @@ extern "C" { #include #endif #include +#include #include #include #ifdef SMP