From 971d395a5d134098efca3f18f8324198dee3e4a0 Mon Sep 17 00:00:00 2001 From: garadeaniket <129481361+garadeaniket@users.noreply.github.com> Date: Thu, 3 Oct 2024 17:36:45 +0530 Subject: [PATCH] NEW SVE BLAS FILES ADDED SVE implementation of gemv , scal , swap and rot BLAS routines files has been added --- kernel/arm64/dgemv_kernel_sve.c | 41 +++++++++++++++++++++++++++++++++ kernel/arm64/gemv_n.c | 32 +++++++++++++++++++++++++ kernel/arm64/rot.c | 19 +++++++++++++++ kernel/arm64/rot_kernel_c.c | 25 ++++++++++++++++++++ kernel/arm64/rot_kernel_sve.c | 38 ++++++++++++++++++++++++++++++ kernel/arm64/scal.c | 20 ++++++++++++++++ kernel/arm64/scal_kernel_c.c | 23 ++++++++++++++++++ kernel/arm64/scal_kernel_sve.c | 28 ++++++++++++++++++++++ kernel/arm64/swap.c | 22 ++++++++++++++++++ kernel/arm64/swap_kernel_sve.c | 37 +++++++++++++++++++++++++++++ 10 files changed, 285 insertions(+) create mode 100644 kernel/arm64/dgemv_kernel_sve.c create mode 100644 kernel/arm64/gemv_n.c create mode 100644 kernel/arm64/rot.c create mode 100644 kernel/arm64/rot_kernel_c.c create mode 100644 kernel/arm64/rot_kernel_sve.c create mode 100644 kernel/arm64/scal.c create mode 100644 kernel/arm64/scal_kernel_c.c create mode 100644 kernel/arm64/scal_kernel_sve.c create mode 100644 kernel/arm64/swap.c create mode 100644 kernel/arm64/swap_kernel_sve.c diff --git a/kernel/arm64/dgemv_kernel_sve.c b/kernel/arm64/dgemv_kernel_sve.c new file mode 100644 index 000000000..759dcb15e --- /dev/null +++ b/kernel/arm64/dgemv_kernel_sve.c @@ -0,0 +1,41 @@ +#include "common.h" + +#include + +#ifdef DOUBLE +#define SVE_TYPE svfloat64_t +#define SVE_ZERO svdup_f64(0.0) +#define SVE_WHILELT svwhilelt_b64 +#define SVE_ALL svptrue_b64() +#define SVE_WIDTH svcntd() +#else +#define SVE_TYPE svfloat32_t +#define SVE_ZERO svdup_f32(0.0) +#define SVE_WHILELT svwhilelt_b32 +#define SVE_ALL svptrue_b32() +#define SVE_WIDTH svcntw() +#endif + +static FLOAT dgemv_kernel_sve(BLASLONG i, FLOAT *x, BLASLONG lda, FLOAT *y, BLASLONG incx, BLASLONG n){ + SVE_TYPE acc_a = SVE_ZERO; + SVE_TYPE acc_b = SVE_ZERO; + + BLASLONG sve_width = SVE_WIDTH; + + for (BLASLONG j = 0; j < n; j += sve_width * 2) { + svbool_t pg_a = SVE_WHILELT(j, n); + svbool_t pg_b = SVE_WHILELT(j + sve_width, n); + + SVE_TYPE x_vec_a = svld1(pg_a, &x[i*lda+j]); + SVE_TYPE y_vec_a = svld1(pg_a, &y[j*incx]); + SVE_TYPE x_vec_b = svld1(pg_b, &x[i*lda+j + sve_width]); + SVE_TYPE y_vec_b = svld1(pg_b, &y[j*incx + sve_width]); + + acc_a = svmla_m(pg_a, acc_a, x_vec_a, y_vec_a); + acc_b = svmla_m(pg_b, acc_b, x_vec_b, y_vec_b); + } + + return svaddv(SVE_ALL, acc_a) + svaddv(SVE_ALL, acc_b); + +} + diff --git a/kernel/arm64/gemv_n.c b/kernel/arm64/gemv_n.c new file mode 100644 index 000000000..0eb2a6fe7 --- /dev/null +++ b/kernel/arm64/gemv_n.c @@ -0,0 +1,32 @@ + +#include "common.h" + +// Some compilers will report feature support for SVE without the appropriate +// header available +#ifdef HAVE_SVE +#if defined __has_include +#if __has_include() && __ARM_FEATURE_SVE +#define USE_SVE +#endif +#endif +#endif + +#include "dgemv_kernel_sve.c" +#include "dgemv_kernel_c.c" + + +int CNAME(BLASLONG m, BLASLONG n , BLASLONG dummy, FLOAT alpha, FLOAT* a, BLASLONG lda , FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ + + if ( incx == 1 && incy == 1){ + // if(alpha!=1) for(BLASLONG i=0; i + +#ifdef DOUBLE +#define SVE_TYPE svfloat64_t +#define SVE_ZERO svdup_f64(0.0) +#define SVE_WHILELT svwhilelt_b64 +#define SVE_ALL svptrue_b64() +#define SVE_WIDTH svcntd() +#else +#define SVE_TYPE svfloat32_t +#define SVE_ZERO svdup_f32(0.0) +#define SVE_WHILELT svwhilelt_b32 +#define SVE_ALL svptrue_b32() +#define SVE_WIDTH svcntw() +#endif + +static void rot_kernel_sve(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s){ + + for(int i=0; i + +#ifdef DOUBLE +#define SVE_TYPE svfloat64_t +#define SVE_ZERO svdup_f64(0.0) +#define SVE_WHILELT svwhilelt_b64 +#define SVE_ALL svptrue_b64() +#define SVE_WIDTH svcntd() +#else +#define SVE_TYPE svfloat32_t +#define SVE_ZERO svdup_f32(0.0) +#define SVE_WHILELT svwhilelt_b32 +#define SVE_ALL svptrue_b32() +#define SVE_WIDTH svcntw() +#endif +static int scal_kernel_sve(int n, FLOAT *x, FLOAT da) +{ + for (int i = 0; i < n; i += SVE_WIDTH){ + svbool_t pg = SVE_WHILELT(i, n); + SVE_TYPE x_vec = svld1(pg, &x[i]); + SVE_TYPE result= svmul_z(pg,x_vec,da); + svst1(pg,&x[i],result); + } + return (0); +} + diff --git a/kernel/arm64/swap.c b/kernel/arm64/swap.c new file mode 100644 index 000000000..30bf454b0 --- /dev/null +++ b/kernel/arm64/swap.c @@ -0,0 +1,22 @@ +#include "common.h" + + +#ifdef HAVE_SVE +#if defined __has_include +#if __has_include() && __ARM_FEATURE_SVE +#define USE_SVE +#endif +#endif +#endif + +#include "swap_kernel_sve.c" + +//(BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG) +//int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT* dummy, BLASLONG dummy2) +{ + swap_kernel_sve(n, x,inc_x, y, inc_y); + return 0; + +} + diff --git a/kernel/arm64/swap_kernel_sve.c b/kernel/arm64/swap_kernel_sve.c new file mode 100644 index 000000000..e09f2450c --- /dev/null +++ b/kernel/arm64/swap_kernel_sve.c @@ -0,0 +1,37 @@ +#include "common.h" + +#include + +#ifdef DOUBLE +#define SVE_TYPE svfloat64_t +#define SVE_ZERO svdup_f64(0.0) +#define SVE_WHILELT svwhilelt_b64 +#define SVE_ALL svptrue_b64() +#define SVE_WIDTH svcntd() +#else +#define SVE_TYPE svfloat32_t +#define SVE_ZERO svdup_f32(0.0) +#define SVE_WHILELT svwhilelt_b32 +#define SVE_ALL svptrue_b32() +#define SVE_WIDTH svcntw() +#endif + +static int swap_kernel_sve(BLASLONG n, FLOAT *x,BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) { + BLASLONG sve_width = SVE_WIDTH; + + for (BLASLONG i = 0; i < n; i += sve_width * 2) { + svbool_t pg_a = SVE_WHILELT(i, n); + svbool_t pg_b = SVE_WHILELT((i + sve_width), n); + SVE_TYPE x_vec_a = svld1(pg_a, &x[i]); + SVE_TYPE y_vec_a = svld1(pg_a, &y[i]); + SVE_TYPE x_vec_b = svld1(pg_b, &x[i + sve_width]); + SVE_TYPE y_vec_b = svld1(pg_b, &y[i + sve_width]); + + svst1(pg_a, &x[i], y_vec_a); + svst1(pg_a, &y[i], x_vec_a); + svst1(pg_b, &x[i+sve_width], y_vec_b); + svst1(pg_b, &y[i+sve_width], x_vec_b); + } + return 0; +} +