From 4c22828812a9d5f0962c836d4c8bf486fde4d9cb Mon Sep 17 00:00:00 2001 From: AbdelRauf Date: Fri, 30 Aug 2019 04:09:15 +0000 Subject: [PATCH] caxpy and cdot are using vec_vsx_ld --- kernel/power/caxpy.c | 67 ++++++++++++++++++++++++++------------------ kernel/power/cdot.c | 48 +++++++++++++++++-------------- 2 files changed, 67 insertions(+), 48 deletions(-) diff --git a/kernel/power/caxpy.c b/kernel/power/caxpy.c index 00f2ec5e0..0545766b1 100644 --- a/kernel/power/caxpy.c +++ b/kernel/power/caxpy.c @@ -27,6 +27,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #ifndef HAVE_ASM_KERNEL #include + +#define offset_0 0 +#define offset_1 16 +#define offset_2 32 +#define offset_3 48 +#define offset_4 64 +#define offset_5 80 +#define offset_6 96 +#define offset_7 112 + static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) @@ -43,27 +53,28 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT #endif __vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr); - register __vector float *vy = (__vector float *) y; - register __vector float *vx = (__vector float *) x; + register __vector float *vptr_y = (__vector float *) y; + register __vector float *vptr_x = (__vector float *) x; BLASLONG i=0; - for (; i < n/2; i += 8) { + for(;i + +#define offset_0 0 +#define offset_1 16 +#define offset_2 32 +#define offset_3 48 + + + static const unsigned char __attribute__((aligned(16))) swap_mask_arr[]={ 4,5,6,7,0,1,2,3, 12,13,14,15, 8,9,10,11}; static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot) { __vector unsigned char swap_mask = *((__vector unsigned char*)swap_mask_arr); - register __vector float *vy = (__vector float *) y; - register __vector float *vx = (__vector float *) x; - BLASLONG i = 0; + register __vector float *vptr_y = (__vector float *) y; + register __vector float *vptr_x = (__vector float *) x; register __vector float vd_0 = { 0 }; register __vector float vd_1 = { 0 }; register __vector float vd_2 = { 0 }; @@ -41,26 +48,23 @@ static void cdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, float *dot) register __vector float vdd_0 = { 0 }; register __vector float vdd_1 = { 0 }; register __vector float vdd_2 = { 0 }; - register __vector float vdd_3 = { 0 }; - for (; i < n/2; i += 4) { + register __vector float vdd_3 = { 0 }; + BLASLONG i=0; + for(;i