diff --git a/kernel/power/KERNEL.POWER5 b/kernel/power/KERNEL.POWER5 index fbef79e59..bea7b17c8 100644 --- a/kernel/power/KERNEL.POWER5 +++ b/kernel/power/KERNEL.POWER5 @@ -54,3 +54,8 @@ ZTRSMKERNEL_LN = ztrsm_kernel_LN.S ZTRSMKERNEL_LT = ztrsm_kernel_LT.S ZTRSMKERNEL_RN = ztrsm_kernel_LT.S ZTRSMKERNEL_RT = ztrsm_kernel_RT.S + +CROTKERNEL = ../arm/zrot.c +ZROTKERNEL = ../arm/zrot.c +SGEMVNKERNEL = ../arm/gemv_n.c +SGEMVTKERNEL = ../arm/gemv_t.c diff --git a/kernel/power/KERNEL.PPC440 b/kernel/power/KERNEL.PPC440 index 677af5f21..fd9a8c780 100644 --- a/kernel/power/KERNEL.PPC440 +++ b/kernel/power/KERNEL.PPC440 @@ -16,11 +16,11 @@ ZASUMKERNEL = zasum_ppc440.S SAXPYKERNEL = axpy_ppc440.S DAXPYKERNEL = axpy_ppc440.S ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) -CAXPYKERNEL = ../arm/zaxpy.c -ZAXPYKERNEL = ../arm/zaxpy.c -else CAXPYKERNEL = zaxpy_ppc440.S ZAXPYKERNEL = zaxpy_ppc440.S +else +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c endif SDOTKERNEL = dot_ppc440.S diff --git a/kernel/power/KERNEL.PPCG4 b/kernel/power/KERNEL.PPCG4 index 54660b54d..1bdd3119e 100644 --- a/kernel/power/KERNEL.PPCG4 +++ b/kernel/power/KERNEL.PPCG4 @@ -15,8 +15,13 @@ ZASUMKERNEL = zasum_ppc440.S SAXPYKERNEL = axpy_ppc440.S DAXPYKERNEL = axpy_ppc440.S +ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) CAXPYKERNEL = zaxpy_ppc440.S ZAXPYKERNEL = zaxpy_ppc440.S +else +CAXPYKERNEL = ../arm/zaxpy.c +ZAXPYKERNEL = ../arm/zaxpy.c +endif SDOTKERNEL = dot_ppc440.S DDOTKERNEL = dot_ppc440.S diff --git a/kernel/power/gemv_n.S b/kernel/power/gemv_n.S index abc61b62e..9c6f87639 100644 --- a/kernel/power/gemv_n.S +++ b/kernel/power/gemv_n.S @@ -159,6 +159,11 @@ #define PREFETCHSIZE_C 16 #endif +#ifdef POWER3 +#define PREFETCHSIZE_A 16 +#define PREFETCHSIZE_C 16 +#endif + #ifdef POWER4 #define PREFETCHSIZE_A 16 #define PREFETCHSIZE_C 16 diff --git a/kernel/power/gemv_t.S b/kernel/power/gemv_t.S index 25a4dd01b..accdad702 100644 --- a/kernel/power/gemv_t.S +++ b/kernel/power/gemv_t.S @@ -124,6 +124,11 @@ #define PREFETCHSIZE_C 16 #endif +#ifdef POWER3 +#define PREFETCHSIZE_A 16 +#define PREFETCHSIZE_C 16 +#endif + #ifdef POWER4 #define PREFETCHSIZE_A 48 #define PREFETCHSIZE_C 16 diff --git a/kernel/power/zgemv_n.S b/kernel/power/zgemv_n.S index 708f1318d..48f49f97b 100644 --- a/kernel/power/zgemv_n.S +++ b/kernel/power/zgemv_n.S @@ -155,6 +155,11 @@ #define PREFETCHSIZE_C 16 #endif +#ifdef POWER3 +#define PREFETCHSIZE_A 34 +#define PREFETCHSIZE_C 16 +#endif + #ifdef POWER4 #define PREFETCHSIZE_A 34 #define PREFETCHSIZE_C 16 diff --git a/kernel/power/zgemv_t.S b/kernel/power/zgemv_t.S index d82fab16a..314cf5e6e 100644 --- a/kernel/power/zgemv_t.S +++ b/kernel/power/zgemv_t.S @@ -129,6 +129,11 @@ #define PREFETCHSIZE_C 16 #endif +#ifdef POWER3 +#define PREFETCHSIZE_A 34 +#define PREFETCHSIZE_C 16 +#endif + #ifdef POWER4 #define PREFETCHSIZE_A 34 #define PREFETCHSIZE_C 16 diff --git a/param.h b/param.h index a35ce69bd..ddad2fb36 100644 --- a/param.h +++ b/param.h @@ -72,13 +72,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef PARAM_H #define PARAM_H -#define LONGCAST (BLASLONG) -#if defined(__BYTE_ORDER__) -#if __GNUC__ < 9 -#undef LONGCAST -#define LONGCAST -#endif -#endif #define SBGEMM_DEFAULT_UNROLL_N 4 #define SBGEMM_DEFAULT_UNROLL_M 8 @@ -2096,7 +2089,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef PPCG4 #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 1024 -#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL +#define GEMM_DEFAULT_ALIGN 0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2127,7 +2120,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 2688 #define GEMM_DEFAULT_OFFSET_B 3072 -#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL +#define GEMM_DEFAULT_ALIGN 0x03fffUL #if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) #define SGEMM_DEFAULT_UNROLL_M 4 @@ -2176,7 +2169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A (32 * 0) #define GEMM_DEFAULT_OFFSET_B (32 * 0) -#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL +#define GEMM_DEFAULT_ALIGN 0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2212,7 +2205,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A (32 * 0) #define GEMM_DEFAULT_OFFSET_B (32 * 0) -#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL +#define GEMM_DEFAULT_ALIGN 0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2247,7 +2240,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(POWER3) || defined(POWER4) || defined(POWER5) #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 2048 -#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL +#define GEMM_DEFAULT_ALIGN 0x0ffffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2271,6 +2264,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define DGEMM_DEFAULT_Q 216 #define DGEMM_DEFAULT_R 1012 +#define CGEMM_DEFAULT_P 256 +#define CGEMM_DEFAULT_Q 104 +#define CGEMM_DEFAULT_R 1012 + #define ZGEMM_DEFAULT_P 256 #define ZGEMM_DEFAULT_Q 104 #define ZGEMM_DEFAULT_R 1012 @@ -2288,6 +2285,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM_DEFAULT_P 144 #define ZGEMM_DEFAULT_P 144 #endif + +#define SGEMM_DEFAULT_Q 256 +#define CGEMM_DEFAULT_Q 256 +#define DGEMM_DEFAULT_Q 256 +#define ZGEMM_DEFAULT_Q 256 #endif #if defined(POWER5) @@ -2320,7 +2322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 384 #define GEMM_DEFAULT_OFFSET_B 1024 -#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL +#define GEMM_DEFAULT_ALIGN 0x03fffUL #define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_N 4 @@ -2353,7 +2355,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 65536 -#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL +#define GEMM_DEFAULT_ALIGN 0x0ffffUL #if defined(__32BIT__) #warning using BINARY32==POWER6 #define SGEMM_DEFAULT_UNROLL_M 4 @@ -2406,7 +2408,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 65536 -#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL +#define GEMM_DEFAULT_ALIGN 0x0ffffUL #define SWITCH_RATIO 16 #define GEMM_PREFERED_SIZE 16 @@ -2445,7 +2447,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 65536 -#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL +#define GEMM_DEFAULT_ALIGN 0x0ffffUL #define SWITCH_RATIO 16 #define GEMM_PREFERED_SIZE 16