diff --git a/interface/ger.c b/interface/ger.c index 8cf1614e3..1c72d51ec 100644 --- a/interface/ger.c +++ b/interface/ger.c @@ -164,6 +164,11 @@ void CNAME(enum CBLAS_ORDER order, if (m == 0 || n == 0) return; if (alpha == 0.) return; + if (incx == 1 && incy == 1 && 1L*m*n <= 2048 *GEMM_MULTITHREAD_THRESHOLD) { + GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); + return; + } + IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/interface/spr.c b/interface/spr.c index 1956986e9..8aafc9f85 100644 --- a/interface/spr.c +++ b/interface/spr.c @@ -167,6 +167,26 @@ void CNAME(enum CBLAS_ORDER order, FUNCTION_PROFILE_START(); + if (incx == 1 && n <100) { + blasint i; + if (uplo==0) { + for (i = 0; i < n; i++){ + if (x[i] != ZERO) { + AXPYU_K(i + 1, 0, 0, alpha * x[i], x, 1, a, 1, NULL, 0); + } + a += i + 1; + } + } else { + for (i = 0; i < n; i++){ + if (x[i] != ZERO) { + AXPYU_K(n - i, 0, 0, alpha * x[i], x + i, 1, a, 1, NULL, 0); + } + a += n - i; + } + } + return; + } + if (incx < 0 ) x -= (n - 1) * incx; buffer = (FLOAT *)blas_memory_alloc(1); diff --git a/interface/spr2.c b/interface/spr2.c index 73a811c3e..b5aab1767 100644 --- a/interface/spr2.c +++ b/interface/spr2.c @@ -168,6 +168,24 @@ void CNAME(enum CBLAS_ORDER order, if (alpha == ZERO) return; + if (incx == 1 && incy == 1 && n < 50) { + blasint i; + if (!uplo) { + for (i = 0; i < n; i++){ + AXPYU_K(i + 1, 0, 0, alpha * x[i], y, 1, a, 1, NULL, 0); + AXPYU_K(i + 1, 0, 0, alpha * y[i], x, 1, a, 1, NULL, 0); + a += i + 1; + } + } else { + for (i = 0; i < n; i++){ + AXPYU_K(n - i, 0, 0, alpha * x[i], y + i, 1, a, 1, NULL, 0); + AXPYU_K(n - i, 0, 0, alpha * y[i], x + i, 1, a, 1, NULL, 0); + a += n - i; + } + } + return; + } + IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/interface/syr2.c b/interface/syr2.c index 08fd47e57..632906d28 100644 --- a/interface/syr2.c +++ b/interface/syr2.c @@ -170,6 +170,25 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, IDEBUG_START; + if (incx == 1 && incy == 1 && n < 100) { + blasint i; + if (!uplo) { + for (i = 0; i < n; i++){ + AXPYU_K(i + 1, 0, 0, alpha * x[i], y, 1, a, 1, NULL, 0); + AXPYU_K(i + 1, 0, 0, alpha * y[i], x, 1, a, 1, NULL, 0); + a += lda; + } + } else { + for (i = 0; i < n; i++){ + AXPYU_K(n - i, 0, 0, alpha * x[i], y + i, 1, a, 1, NULL, 0); + AXPYU_K(n - i, 0, 0, alpha * y[i], x + i, 1, a, 1, NULL, 0); + a += 1 + lda; + } + } + return; + } + + FUNCTION_PROFILE_START(); if (incx < 0 ) x -= (n - 1) * incx; diff --git a/interface/trsv.c b/interface/trsv.c index a054d8eeb..6a6e8f8ba 100644 --- a/interface/trsv.c +++ b/interface/trsv.c @@ -188,6 +188,12 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, if (n == 0) return; + if (incx == 1 && trans == 0 && n < 50) { + buffer = NULL; + (trsv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); + return; + } + IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/interface/zsyr.c b/interface/zsyr.c index 09b1de578..71d4dbf29 100644 --- a/interface/zsyr.c +++ b/interface/zsyr.c @@ -172,6 +172,32 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; + if (incx == 1 && n < 50) { + blasint i; + if (!uplo) { + for (i = 0; i < n; i++){ + if ((x[i * 2 + 0] != ZERO) || (x[i * 2 + 1] != ZERO)) { + AXPYU_K(i + 1, 0, 0, + alpha_r * x[i * 2 + 0] - alpha_i * x[i * 2 + 1], + alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1], + x, 1, a, 1, NULL, 0); + } + a += lda; + } + } else { + for (i = 0; i < n; i++){ + if ((x[i * 2 + 0] != ZERO) || (x[i * 2 + 1] != ZERO)) { + AXPYU_K(n - i, 0, 0, + alpha_r * x[i * 2 + 0] - alpha_i * x[i * 2 + 1], + alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1], + x + i * 2, 1, a, 1, NULL, 0); + } + a += 2 + lda; + } + } + return; + } + IDEBUG_START; FUNCTION_PROFILE_START(); diff --git a/interface/ztrsv.c b/interface/ztrsv.c index cbb7bba13..cf750b0b0 100644 --- a/interface/ztrsv.c +++ b/interface/ztrsv.c @@ -199,6 +199,12 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, if (n == 0) return; + if (incx == 1 && trans == 0 && n < 50) { + buffer = NULL; + (trsv[(trans<<2) | (uplo<<1) | unit])(n, a, lda, x, incx, buffer); + return; + } + IDEBUG_START; FUNCTION_PROFILE_START();