From d6d7a6685dc5189b43519bb0d5a5fba52b4b0955 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 27 May 2021 22:39:18 +0200 Subject: [PATCH 1/4] Add shortcuts for (small) cases that do not need expensive buffer allocation --- interface/ger.c | 5 +++++ interface/spr.c | 20 ++++++++++++++++++++ interface/spr2.c | 18 ++++++++++++++++++ interface/symv.c | 4 ++++ interface/syr2.c | 19 +++++++++++++++++++ interface/zsyr.c | 26 ++++++++++++++++++++++++++ 6 files changed, 92 insertions(+) diff --git a/interface/ger.c b/interface/ger.c index 8cf1614e3..1c72d51ec 100644 --- a/interface/ger.c +++ b/interface/ger.c @@ -164,6 +164,11 @@ void CNAME(enum CBLAS_ORDER order, if (m == 0 || n == 0) return; if (alpha == 0.) return; + if (incx == 1 && incy == 1 && 1L*m*n <= 2048 *GEMM_MULTITHREAD_THRESHOLD) { + GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); + return; + } + IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/interface/spr.c b/interface/spr.c index 1956986e9..8aafc9f85 100644 --- a/interface/spr.c +++ b/interface/spr.c @@ -167,6 +167,26 @@ void CNAME(enum CBLAS_ORDER order, FUNCTION_PROFILE_START(); + if (incx == 1 && n <100) { + blasint i; + if (uplo==0) { + for (i = 0; i < n; i++){ + if (x[i] != ZERO) { + AXPYU_K(i + 1, 0, 0, alpha * x[i], x, 1, a, 1, NULL, 0); + } + a += i + 1; + } + } else { + for (i = 0; i < n; i++){ + if (x[i] != ZERO) { + AXPYU_K(n - i, 0, 0, alpha * x[i], x + i, 1, a, 1, NULL, 0); + } + a += n - i; + } + } + return; + } + if (incx < 0 ) x -= (n - 1) * incx; buffer = (FLOAT *)blas_memory_alloc(1); diff --git a/interface/spr2.c b/interface/spr2.c index 73a811c3e..b5aab1767 100644 --- a/interface/spr2.c +++ b/interface/spr2.c @@ -168,6 +168,24 @@ void CNAME(enum CBLAS_ORDER order, if (alpha == ZERO) return; + if (incx == 1 && incy == 1 && n < 50) { + blasint i; + if (!uplo) { + for (i = 0; i < n; i++){ + AXPYU_K(i + 1, 0, 0, alpha * x[i], y, 1, a, 1, NULL, 0); + AXPYU_K(i + 1, 0, 0, alpha * y[i], x, 1, a, 1, NULL, 0); + a += i + 1; + } + } else { + for (i = 0; i < n; i++){ + AXPYU_K(n - i, 0, 0, alpha * x[i], y + i, 1, a, 1, NULL, 0); + AXPYU_K(n - i, 0, 0, alpha * y[i], x + i, 1, a, 1, NULL, 0); + a += n - i; + } + } + return; + } + IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/interface/symv.c b/interface/symv.c index 07bd20022..de2b91ee4 100644 --- a/interface/symv.c +++ b/interface/symv.c @@ -170,6 +170,10 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, if (alpha == ZERO) return; + if (incx == 1 && incy == 1 && n*n < 2304 *GEMM_MULTITHREAD_THRESHOLD) { + (symv[uplo])(n, n, alpha, a, lda, x, incx, y, incy, buffer); + return; + } IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/interface/syr2.c b/interface/syr2.c index 08fd47e57..632906d28 100644 --- a/interface/syr2.c +++ b/interface/syr2.c @@ -170,6 +170,25 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, IDEBUG_START; + if (incx == 1 && incy == 1 && n < 100) { + blasint i; + if (!uplo) { + for (i = 0; i < n; i++){ + AXPYU_K(i + 1, 0, 0, alpha * x[i], y, 1, a, 1, NULL, 0); + AXPYU_K(i + 1, 0, 0, alpha * y[i], x, 1, a, 1, NULL, 0); + a += lda; + } + } else { + for (i = 0; i < n; i++){ + AXPYU_K(n - i, 0, 0, alpha * x[i], y + i, 1, a, 1, NULL, 0); + AXPYU_K(n - i, 0, 0, alpha * y[i], x + i, 1, a, 1, NULL, 0); + a += 1 + lda; + } + } + return; + } + + FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx; diff --git a/interface/zsyr.c b/interface/zsyr.c index 09b1de578..b68237c93 100644 --- a/interface/zsyr.c +++ b/interface/zsyr.c @@ -172,6 +172,32 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + if (incx == 1 && incy == 1 && n < 50) { + blasint i; + if (!uplo) { + for (i = 0; i < n; i++){ + if ((x[i * 2 + 0] != ZERO) || (x[i * 2 + 1] != ZERO)) { + AXPYU_K(i + 1, 0, 0, + alpha_r * x[i * 2 + 0] - alpha_i * x[i * 2 + 1], + alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1], + x, 1, a, 1, NULL, 0); + } + a += lda; + } + } else { + for (i = 0; i < n; i++){ + if ((x[i * 2 + 0] != ZERO) || (x[i * 2 + 1] != ZERO)) { + AXPYU_K(m - i, 0, 0, + alpha_r * x[i * 2 + 0] - alpha_i * x[i * 2 + 1], + alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1], + x + i * 2, 1, a, 1, NULL, 0); + } + a += 2 + lda; + } + } + return; + } + IDEBUG_START; FUNCTION_PROFILE_START(); From 1217eb910d2da2e8ce47ef62fd3543c6345a3923 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 28 May 2021 09:38:48 +0200 Subject: [PATCH 2/4] Fix copy-paste errors in variables used --- interface/zsyr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/interface/zsyr.c b/interface/zsyr.c index b68237c93..71d4dbf29 100644 --- a/interface/zsyr.c +++ b/interface/zsyr.c @@ -172,7 +172,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; - if (incx == 1 && incy == 1 && n < 50) { + if (incx == 1 && n < 50) { blasint i; if (!uplo) { for (i = 0; i < n; i++){ @@ -187,7 +187,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO } else { for (i = 0; i < n; i++){ if ((x[i * 2 + 0] != ZERO) || (x[i * 2 + 1] != ZERO)) { - AXPYU_K(m - i, 0, 0, + AXPYU_K(n - i, 0, 0, alpha_r * x[i * 2 + 0] - alpha_i * x[i * 2 + 1], alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1], x + i * 2, 1, a, 1, NULL, 0); From 734bd265a8b1c80f8fc078ad93fad817bdc9c08e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 29 May 2021 15:40:03 +0200 Subject: [PATCH 3/4] revert symv changes for now --- interface/symv.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/interface/symv.c b/interface/symv.c index de2b91ee4..07bd20022 100644 --- a/interface/symv.c +++ b/interface/symv.c @@ -170,10 +170,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, if (alpha == ZERO) return; - if (incx == 1 && incy == 1 && n*n < 2304 *GEMM_MULTITHREAD_THRESHOLD) { - (symv[uplo])(n, n, alpha, a, lda, x, incx, y, incy, buffer); - return; - } IDEBUG_START; FUNCTION_PROFILE_START(); From f84197c1a731889495f282be1d7089deedc83081 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 29 May 2021 22:28:00 +0200 Subject: [PATCH 4/4] Add shortcuts for (small) cases that do not need expensive buffer allocation --- interface/trsv.c | 6 ++++++ interface/ztrsv.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/interface/trsv.c b/interface/trsv.c index a054d8eeb..6a6e8f8ba 100644 --- a/interface/trsv.c +++ b/interface/trsv.c @@ -188,6 +188,12 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, if (n == 0) return; + if (incx == 1 && trans == 0 && n < 50) { + buffer = NULL; + (trsv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); + return; + } + IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/interface/ztrsv.c b/interface/ztrsv.c index cbb7bba13..cf750b0b0 100644 --- a/interface/ztrsv.c +++ b/interface/ztrsv.c @@ -199,6 +199,12 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, if (n == 0) return; + if (incx == 1 && trans == 0 && n < 50) { + buffer = NULL; + (trsv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); + return; + } + IDEBUG_START; FUNCTION_PROFILE_START();