From ff41cf5c49bda6bf84950812e737b2e7bcddf139 Mon Sep 17 00:00:00 2001 From: kseniyazaytseva Date: Fri, 17 Mar 2023 14:28:26 +0300 Subject: [PATCH 1/8] Fix BLAS, BLAS-like functions and Generic RISC-V kernels * Fixed gemmt, imatcopy, zimatcopy_cnc functions * Fixed cblas_cscal testing in ctest * Removed rotmg unreacheble code * Added zero size checks --- cblas.h | 8 ++ common_interface.h | 9 ++ ctest/c_cblat1.f | 10 +- ctest/c_cblat1c.c | 6 +- interface/gemmt.c | 233 ++++++++++++++++++++++----------- interface/imatcopy.c | 8 +- interface/rotmg.c | 28 +--- interface/zimatcopy.c | 6 +- kernel/generic/zimatcopy_cnc.c | 1 - kernel/riscv64/axpby.c | 2 +- kernel/riscv64/axpy.c | 2 +- kernel/riscv64/copy.c | 2 +- kernel/riscv64/dot.c | 2 +- kernel/riscv64/swap.c | 2 +- kernel/riscv64/zaxpy.c | 2 +- kernel/riscv64/zcopy.c | 2 +- kernel/riscv64/zswap.c | 2 +- 17 files changed, 201 insertions(+), 124 deletions(-) diff --git a/cblas.h b/cblas.h index c2bdd27fa..f7d36788d 100644 --- a/cblas.h +++ b/cblas.h @@ -289,6 +289,14 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); +void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, + OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); +void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, + OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); +void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); +void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, + OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); diff --git a/common_interface.h b/common_interface.h index 318827920..61a82c306 100644 --- a/common_interface.h +++ b/common_interface.h @@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double * void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); +void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *, + float *, blasint *, float *, blasint *, float *, float *, blasint *); +void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *, + double *, blasint *, double *, blasint *, double *, double *, blasint *); +void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *, + float *, blasint *, float *, blasint *, float *, float *, blasint *); +void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *, + double *, blasint *, double *, blasint *, double *, double *, blasint *); + int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *, float *, float *, blasint *); diff --git a/ctest/c_cblat1.f b/ctest/c_cblat1.f index 1a123d74d..cad7c7fa7 100644 --- a/ctest/c_cblat1.f +++ b/ctest/c_cblat1.f @@ -96,7 +96,7 @@ INTEGER ICAMAXTEST EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST * .. External Subroutines .. - EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1 + EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1 * .. Intrinsic Functions .. INTRINSIC MAX * .. Common blocks .. @@ -214,8 +214,8 @@ CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1), + STRUE4(NP1),SFAC) ELSE IF (ICASE.EQ.8) THEN -* .. CSCAL .. - CALL CSCAL(N,CA,CX,INCX) +* .. CSCALTEST .. + CALL CSCALTEST(N,CA,CX,INCX) CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), + SFAC) ELSE IF (ICASE.EQ.9) THEN @@ -236,14 +236,14 @@ * INCX = 1 IF (ICASE.EQ.8) THEN -* CSCAL +* CSCALTEST * Add a test for alpha equal to zero. CA = (0.0E0,0.0E0) DO 80 I = 1, 5 MWPCT(I) = (0.0E0,0.0E0) MWPCS(I) = (1.0E0,1.0E0) 80 CONTINUE - CALL CSCAL(5,CA,CX,INCX) + CALL CSCALTEST(5,CA,CX,INCX) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) ELSE IF (ICASE.EQ.9) THEN * CSSCALTEST diff --git a/ctest/c_cblat1c.c b/ctest/c_cblat1c.c index 8c0dd140c..af29301af 100644 --- a/ctest/c_cblat1c.c +++ b/ctest/c_cblat1c.c @@ -685,7 +685,7 @@ real *sfac; static integer i__; extern /* Subroutine */ int ctest_(); static complex mwpcs[5], mwpct[5]; - extern /* Subroutine */ int itest1_(), stest1_(); + extern /* Subroutine */ int cscaltest_(), itest1_(), stest1_(); static complex cx[8]; extern real scnrm2test_(); static integer np1; @@ -727,7 +727,7 @@ real *sfac; stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); } else if (combla_1.icase == 8) { /* .. CSCAL .. */ - cscal_(&combla_1.n, &ca, cx, &combla_1.incx); + cscaltest_(&combla_1.n, &ca, cx, &combla_1.incx); ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); } else if (combla_1.icase == 9) { @@ -761,7 +761,7 @@ real *sfac; mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.; /* L80: */ } - cscal_(&c__5, &ca, cx, &combla_1.incx); + cscaltest_(&c__5, &ca, cx, &combla_1.incx); ctest_(&c__5, cx, mwpct, mwpcs, sfac); } else if (combla_1.icase == 9) { /* CSSCALTEST */ diff --git a/interface/gemmt.c b/interface/gemmt.c index 3eed1dfe4..a4530721c 100644 --- a/interface/gemmt.c +++ b/interface/gemmt.c @@ -35,29 +35,26 @@ #include #include #include "common.h" -#ifdef FUNCTION_PROFILE -#include "functable.h" -#endif #ifndef COMPLEX #define SMP_THRESHOLD_MIN 65536.0 #ifdef XDOUBLE -#define ERROR_NAME "QGEMT " +#define ERROR_NAME "QGEMMT " #elif defined(DOUBLE) -#define ERROR_NAME "DGEMT " +#define ERROR_NAME "DGEMMT " #elif defined(BFLOAT16) -#define ERROR_NAME "SBGEMT " +#define ERROR_NAME "SBGEMMT " #else -#define ERROR_NAME "SGEMT " +#define ERROR_NAME "SGEMMT " #endif #else #define SMP_THRESHOLD_MIN 8192.0 #ifdef XDOUBLE -#define ERROR_NAME "XGEMT " +#define ERROR_NAME "XGEMMT " #elif defined(DOUBLE) -#define ERROR_NAME "ZGEMT " +#define ERROR_NAME "ZGEMMT " #else -#define ERROR_NAME "CGEMT " +#define ERROR_NAME "CGEMMT " #endif #endif @@ -68,18 +65,22 @@ #ifndef CBLAS void NAME(char *UPLO, char *TRANSA, char *TRANSB, - blasint * M, blasint * N, blasint * K, + blasint * M, blasint * K, FLOAT * Alpha, IFLOAT * a, blasint * ldA, IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) { - blasint m, n, k; + blasint m, k; blasint lda, ldb, ldc; int transa, transb, uplo; blasint info; char transA, transB, Uplo; + blasint nrowa, nrowb; +#if defined(COMPLEX) + blasint ncolb; +#endif IFLOAT *buffer; IFLOAT *aa, *bb; FLOAT *cc; @@ -92,7 +93,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, PRINT_DEBUG_NAME; m = *M; - n = *N; k = *K; #if defined(COMPLEX) @@ -159,32 +159,47 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, if (Uplo == 'L') uplo = 1; + nrowa = m; + if (transa & 1) nrowa = k; + nrowb = k; +#if defined(COMPLEX) + ncolb = m; +#endif + if (transb & 1) { + nrowb = m; +#if defined(COMPLEX) + ncolb = k; +#endif + } + info = 0; - if (uplo < 0) - info = 14; - if (ldc < m) + if (ldc < MAX(1, m)) info = 13; + if (ldb < MAX(1, nrowb)) + info = 10; + if (lda < MAX(1, nrowa)) + info = 8; if (k < 0) info = 5; - if (n < 0) - info = 4; if (m < 0) - info = 3; + info = 4; if (transb < 0) - info = 2; + info = 3; if (transa < 0) + info = 2; + if (uplo < 0) info = 1; - if (info) { + if (info != 0) { BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); return; } #else void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, - enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, - blasint N, blasint k, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m, + blasint k, #ifndef COMPLEX FLOAT alpha, IFLOAT * A, blasint LDA, @@ -205,17 +220,23 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int transa, transb, uplo; blasint info; - blasint m, n, lda, ldb; + blasint lda, ldb; FLOAT *a, *b; +#if defined(COMPLEX) + blasint nrowb, ncolb; +#endif XFLOAT *buffer; PRINT_DEBUG_CNAME; + uplo = -1; transa = -1; transb = -1; info = 0; if (order == CblasColMajor) { + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; if (TransA == CblasNoTrans) transa = 0; @@ -248,9 +269,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, transb = 3; #endif - m = M; - n = N; - a = (void *)A; b = (void *)B; lda = LDA; @@ -258,23 +276,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, info = -1; - if (ldc < m) + blasint nrowa; +#if !defined(COMPLEX) + blasint nrowb; +#endif + nrowa = m; + if (transa & 1) nrowa = k; + nrowb = k; +#if defined(COMPLEX) + ncolb = m; +#endif + if (transb & 1) { + nrowb = m; +#if defined(COMPLEX) + ncolb = k; +#endif + } + + if (ldc < MAX(1, m)) info = 13; + if (ldb < MAX(1, nrowb)) + info = 10; + if (lda < MAX(1, nrowa)) + info = 8; if (k < 0) info = 5; - if (n < 0) - info = 4; if (m < 0) - info = 3; + info = 4; if (transb < 0) - info = 2; + info = 3; if (transa < 0) + info = 2; + if (uplo < 0) info = 1; } if (order == CblasRowMajor) { - m = N; - n = M; a = (void *)B; b = (void *)A; @@ -282,6 +319,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, lda = LDB; ldb = LDA; + if (Uplo == CblasUpper) uplo = 0; + if (Uplo == CblasLower) uplo = 1; + if (TransB == CblasNoTrans) transa = 0; if (TransB == CblasTrans) @@ -315,29 +355,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, info = -1; - if (ldc < m) + blasint ncola; +#if !defined(COMPLEX) + blasint ncolb; +#endif + ncola = m; + if (transa & 1) ncola = k; + ncolb = k; +#if defined(COMPLEX) + nrowb = m; +#endif + + if (transb & 1) { +#if defined(COMPLEX) + nrowb = k; +#endif + ncolb = m; + } + + if (ldc < MAX(1,m)) info = 13; + if (ldb < MAX(1, ncolb)) + info = 8; + if (lda < MAX(1, ncola)) + info = 10; if (k < 0) info = 5; - if (n < 0) - info = 4; if (m < 0) - info = 3; + info = 4; if (transb < 0) info = 2; if (transa < 0) + info = 3; + if (uplo < 0) info = 1; - } - uplo = -1; - if (Uplo == CblasUpper) - uplo = 0; - if (Uplo == CblasLower) - uplo = 1; - if (uplo < 0) - info = 14; - if (info >= 0) { BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); return; @@ -407,37 +460,48 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #endif - if ((m == 0) || (n == 0)) + if (m == 0) return; IDEBUG_START; - FUNCTION_PROFILE_START(); +#if defined(COMPLEX) + if (transb > 1){ +#ifndef CBLAS + IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); +#else + if (order == CblasColMajor) + IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); + if (order == CblasRowMajor) + IMATCOPY_K_RNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); +#endif + } +#endif - const blasint incb = (transb == 0) ? 1 : ldb; + const blasint incb = ((transb & 1) == 0) ? 1 : ldb; if (uplo == 1) { - for (i = 0; i < n; i++) { - j = n - i; + for (i = 0; i < m; i++) { + j = m - i; l = j; #if defined(COMPLEX) aa = a + i * 2; bb = b + i * ldb * 2; - if (transa) { - l = k; + if (transa & 1) { aa = a + lda * i * 2; - bb = b + i * 2; } + if (transb & 1) + bb = b + i * 2; cc = c + i * 2 * ldc + i * 2; #else aa = a + i; bb = b + i * ldb; - if (transa) { - l = k; + if (transa & 1) { aa = a + lda * i; - bb = b + i; } + if (transb & 1) + bb = b + i; cc = c + i * ldc + i; #endif @@ -447,7 +511,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, NULL, 0); if (alpha_r == ZERO && alpha_i == ZERO) - return; + continue; #else if (beta != ONE) SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); @@ -458,8 +522,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, IDEBUG_START; - FUNCTION_PROFILE_START(); - buffer_size = j + k + 128 / sizeof(FLOAT); #ifdef WINDOWS_ABI buffer_size += 160 / sizeof(FLOAT); @@ -479,20 +541,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #endif #if defined(COMPLEX) + if (!(transa & 1)) (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, aa, lda, bb, incb, cc, 1, buffer); + else + (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, + aa, lda, bb, incb, cc, 1, + buffer); #else + if (!(transa & 1)) (gemv[(int)transa]) (j, k, 0, alpha, aa, lda, bb, incb, cc, 1, buffer); + else + (gemv[(int)transa]) (k, j, 0, alpha, aa, lda, + bb, incb, cc, 1, buffer); #endif #ifdef SMP } else { - + if (!(transa & 1)) (gemv_thread[(int)transa]) (j, k, alpha, aa, lda, bb, incb, cc, 1, buffer, nthreads); + else + (gemv_thread[(int)transa]) (k, j, alpha, aa, + lda, bb, incb, cc, + 1, buffer, + nthreads); } #endif @@ -501,21 +577,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, } } else { - for (i = 0; i < n; i++) { + for (i = 0; i < m; i++) { j = i + 1; l = j; #if defined COMPLEX bb = b + i * ldb * 2; - if (transa) { - l = k; + if (transb & 1) { bb = b + i * 2; } cc = c + i * 2 * ldc; #else bb = b + i * ldb; - if (transa) { - l = k; + if (transb & 1) { bb = b + i; } cc = c + i * ldc; @@ -527,7 +601,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, NULL, 0); if (alpha_r == ZERO && alpha_i == ZERO) - return; + continue; #else if (beta != ONE) SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); @@ -537,8 +611,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #endif IDEBUG_START; - FUNCTION_PROFILE_START(); - buffer_size = j + k + 128 / sizeof(FLOAT); #ifdef WINDOWS_ABI buffer_size += 160 / sizeof(FLOAT); @@ -558,32 +630,41 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, #endif #if defined(COMPLEX) + if (!(transa & 1)) (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, a, lda, bb, incb, cc, 1, buffer); + else + (gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, + a, lda, bb, incb, cc, 1, + buffer); #else + if (!(transa & 1)) (gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, incb, cc, 1, buffer); + else + (gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb, + incb, cc, 1, buffer); #endif #ifdef SMP } else { - + if (!(transa & 1)) (gemv_thread[(int)transa]) (j, k, alpha, a, lda, bb, incb, cc, 1, buffer, nthreads); - + else + (gemv_thread[(int)transa]) (k, j, alpha, a, lda, + bb, incb, cc, 1, + buffer, nthreads); } #endif STACK_FREE(buffer); } } - FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, - args.m * args.k + args.k * args.n + - args.m * args.n, 2 * args.m * args.n * args.k); IDEBUG_END; return; -} +} \ No newline at end of file diff --git a/interface/imatcopy.c b/interface/imatcopy.c index 91975f7f4..109280fe6 100644 --- a/interface/imatcopy.c +++ b/interface/imatcopy.c @@ -149,10 +149,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, #endif - if ( *lda > *ldb ) - msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT); - else - msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT); + if ( *rows > *cols ) + msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT); + else + msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT); b = malloc(msize); if ( b == NULL ) diff --git a/interface/rotmg.c b/interface/rotmg.c index 3a5ca8f95..b8f627221 100644 --- a/interface/rotmg.c +++ b/interface/rotmg.c @@ -96,12 +96,6 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ else { dp2 = *dd2 * dy1; - if(dp2 == ZERO) - { - dflag = -TWO; - dparam[0] = dflag; - return; - } dp1 = *dd1 * *dx1; dq2 = dp2 * dy1; dq1 = dp1 * *dx1; @@ -113,24 +107,10 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ dh12 = dp2 / dp1; du = ONE - dh12 * dh21; - if(du > ZERO) - { - dflag = ZERO; - *dd1 = *dd1 / du; - *dd2 = *dd2 / du; - *dx1 = *dx1 * du; - } else { - dflag = -ONE; - - dh11 = ZERO; - dh12 = ZERO; - dh21 = ZERO; - dh22 = ZERO; - - *dd1 = ZERO; - *dd2 = ZERO; - *dx1 = ZERO; - } + dflag = ZERO; + *dd1 = *dd1 / du; + *dd2 = *dd2 / du; + *dx1 = *dx1 * du; } else diff --git a/interface/zimatcopy.c b/interface/zimatcopy.c index ecda5ef4e..7d73ba572 100644 --- a/interface/zimatcopy.c +++ b/interface/zimatcopy.c @@ -171,10 +171,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, } #endif - if ( *lda > *ldb ) - msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2; + if ( *rows > *cols ) + msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT) * 2; else - msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2; + msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT) * 2; b = malloc(msize); if ( b == NULL ) diff --git a/kernel/generic/zimatcopy_cnc.c b/kernel/generic/zimatcopy_cnc.c index 8e772bd8a..6426cffc0 100644 --- a/kernel/generic/zimatcopy_cnc.c +++ b/kernel/generic/zimatcopy_cnc.c @@ -40,7 +40,6 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, if ( rows <= 0 ) return(0); if ( cols <= 0 ) return(0); - if ( alpha_r == 1.0 && alpha_i == 0.0 ) return (0); aptr = a; lda *= 2; diff --git a/kernel/riscv64/axpby.c b/kernel/riscv64/axpby.c index 278747f75..04f9518d3 100644 --- a/kernel/riscv64/axpby.c +++ b/kernel/riscv64/axpby.c @@ -33,7 +33,7 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT * BLASLONG i=0; BLASLONG ix,iy; - if ( n < 0 ) return(0); + if ( n <= 0 ) return(0); ix = 0; iy = 0; diff --git a/kernel/riscv64/axpy.c b/kernel/riscv64/axpy.c index fb1094dd9..19d12ad3f 100644 --- a/kernel/riscv64/axpy.c +++ b/kernel/riscv64/axpy.c @@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS BLASLONG i=0; BLASLONG ix,iy; - if ( n < 0 ) return(0); + if ( n <= 0 ) return(0); if ( da == 0.0 ) return(0); ix = 0; diff --git a/kernel/riscv64/copy.c b/kernel/riscv64/copy.c index 7b4f04f30..e79ca59af 100644 --- a/kernel/riscv64/copy.c +++ b/kernel/riscv64/copy.c @@ -41,7 +41,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) BLASLONG i=0; BLASLONG ix=0,iy=0; - if ( n < 0 ) return(0); + if ( n <= 0 ) return(0); while(i < n) { diff --git a/kernel/riscv64/dot.c b/kernel/riscv64/dot.c index 46a84ad18..bf55998ca 100644 --- a/kernel/riscv64/dot.c +++ b/kernel/riscv64/dot.c @@ -46,7 +46,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) BLASLONG ix=0,iy=0; double dot = 0.0 ; - if ( n < 0 ) return(dot); + if ( n < 1 ) return(dot); while(i < n) { diff --git a/kernel/riscv64/swap.c b/kernel/riscv64/swap.c index eac621fb2..33bbeeb6a 100644 --- a/kernel/riscv64/swap.c +++ b/kernel/riscv64/swap.c @@ -41,7 +41,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG ix=0,iy=0; FLOAT temp; - if ( n < 0 ) return(0); + if ( n <= 0 ) return(0); while(i < n) { diff --git a/kernel/riscv64/zaxpy.c b/kernel/riscv64/zaxpy.c index 1dcaeac27..18b6315cb 100644 --- a/kernel/riscv64/zaxpy.c +++ b/kernel/riscv64/zaxpy.c @@ -44,7 +44,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, BLASLONG inc_x2; BLASLONG inc_y2; - if ( n < 0 ) return(0); + if ( n <= 0 ) return(0); if ( da_r == 0.0 && da_i == 0.0 ) return(0); ix = 0; diff --git a/kernel/riscv64/zcopy.c b/kernel/riscv64/zcopy.c index 07fe584c5..b0f19efd5 100644 --- a/kernel/riscv64/zcopy.c +++ b/kernel/riscv64/zcopy.c @@ -43,7 +43,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) BLASLONG inc_x2; BLASLONG inc_y2; - if ( n < 0 ) return(0); + if ( n <= 0 ) return(0); inc_x2 = 2 * inc_x; inc_y2 = 2 * inc_y; diff --git a/kernel/riscv64/zswap.c b/kernel/riscv64/zswap.c index ae4760ae0..df1402b94 100644 --- a/kernel/riscv64/zswap.c +++ b/kernel/riscv64/zswap.c @@ -45,7 +45,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm BLASLONG inc_x2; BLASLONG inc_y2; - if ( n < 0 ) return(0); + if ( n <= 0 ) return(0); inc_x2 = 2 * inc_x; inc_y2 = 2 * inc_y; From 5b4df851d7581145f0aee4336f11127a3a7acc8a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 21 Mar 2023 08:29:05 +0100 Subject: [PATCH 2/8] fix stray blank on continuation line --- interface/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index 6f320d8f7..a4d3f710a 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -92,8 +92,9 @@ CBLAS2OBJS = \ cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \ ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \ csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \ - csbmv.$(SUFFIX) \ - cspr2.$(SUFFIX) \ + csbmv.$(SUFFIX) cspmv.$(SUFFIX) \ + cspr.$(SUFFIX) cspr2.$(SUFFIX) \ + csymv.$(SUFFIX) csyr.$(SUFFIX) \ ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \ ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \ chemv.$(SUFFIX) chbmv.$(SUFFIX) \ From 1c04df20bd9c845160b3eb2e51adaceb6f93cf8a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 20 Mar 2023 23:04:12 +0100 Subject: [PATCH 3/8] Re-enable overriding the LAPACK SYMV,SYR,SPMV and SPR implementations --- lapack-netlib/SRC/Makefile | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 49798b0c5..5f22789bd 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -572,22 +572,26 @@ ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o SLAPACKOBJS = \ sgetrf.o sgetrs.o spotrf.o sgetf2.o \ spotf2.o slaswp.o sgesv.o slauu2.o \ - slauum.o strti2.o strtri.o strtrs.o + slauum.o strti2.o strtri.o strtrs.o \ + ssymv.o ssyr.o sspmv.o sspr.o DLAPACKOBJS = \ dgetrf.o dgetrs.o dpotrf.o dgetf2.o \ dpotf2.o dlaswp.o dgesv.o dlauu2.o \ - dlauum.o dtrti2.o dtrtri.o dtrtrs.o + dlauum.o dtrti2.o dtrtri.o dtrtrs.o \ + dsymv.o dsyr.o dspmv.o dspr.o CLAPACKOBJS = \ cgetrf.o cgetrs.o cpotrf.o cgetf2.o \ cpotf2.o claswp.o cgesv.o clauu2.o \ - clauum.o ctrti2.o ctrtri.o ctrtrs.o + clauum.o ctrti2.o ctrtri.o ctrtrs.o \ + csymv.o csyr.o cspmv.o cspr.o ZLAPACKOBJS = \ zgetrf.o zgetrs.o zpotrf.o zgetf2.o \ zpotf2.o zlaswp.o zgesv.o zlauu2.o \ - zlauum.o ztrti2.o ztrtri.o ztrtrs.o + zlauum.o ztrti2.o ztrtri.o ztrtrs.o \ + zsymv.o zsyr.o zspmv.o zspr.o ALLAUX = $(filter-out $(ALL_AUX_OBJS),$(ALLAUX_O)) SLASRC = $(filter-out $(SLAPACKOBJS),$(SLASRC_O)) From 5222b5fc18829265be7ffc77e77271a18f17c005 Mon Sep 17 00:00:00 2001 From: kseniyazaytseva Date: Thu, 12 Oct 2023 22:06:00 +0300 Subject: [PATCH 4/8] Added axpby kernels for GENERIC RISC-V target --- kernel/riscv64/KERNEL.RISCV64_GENERIC | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/riscv64/KERNEL.RISCV64_GENERIC b/kernel/riscv64/KERNEL.RISCV64_GENERIC index 61a8a2b91..15bcd2289 100644 --- a/kernel/riscv64/KERNEL.RISCV64_GENERIC +++ b/kernel/riscv64/KERNEL.RISCV64_GENERIC @@ -45,6 +45,11 @@ DAXPYKERNEL = ../riscv64/axpy.c CAXPYKERNEL = ../riscv64/zaxpy.c ZAXPYKERNEL = ../riscv64/zaxpy.c +SAXPBYKERNEL = ../riscv64/axpby.c +DAXPBYKERNEL = ../riscv64/axpby.c +CAXPBYKERNEL = ../riscv64/zaxpby.c +ZAXPBYKERNEL = ../riscv64/zaxpby.c + SCOPYKERNEL = ../riscv64/copy.c DCOPYKERNEL = ../riscv64/copy.c CCOPYKERNEL = ../riscv64/zcopy.c From f1291614536d7d1bec6508fda9b0c56dd7286bb3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 21 Mar 2023 07:43:03 +0100 Subject: [PATCH 5/8] restore C/Z SPMV, SPR, SYR,SYMV --- interface/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index a4d3f710a..3db4b2b6d 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -122,8 +122,9 @@ ZBLAS2OBJS = \ zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \ ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \ zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \ - zsbmv.$(SUFFIX) \ - zspr2.$(SUFFIX) \ + zsbmv.$(SUFFIX) zspmv.$(SUFFIX) \ + zspr.$(SUFFIX) zspr2.$(SUFFIX) \ + zsymv.$(SUFFIX) zsyr.$(SUFFIX) \ ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \ ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \ zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \ From 85548e66ca25228a73ec08c257d5d92108b94b62 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 28 Mar 2023 16:33:09 +0200 Subject: [PATCH 6/8] Fix build failures seen with the NO_LAPACK option - cspr/csymv/csyr belong on the LAPACK list --- interface/Makefile | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index 3db4b2b6d..6f320d8f7 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -92,9 +92,8 @@ CBLAS2OBJS = \ cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \ ctrsv.$(SUFFIX) ctrmv.$(SUFFIX) \ csyr2.$(SUFFIX) cgbmv.$(SUFFIX) \ - csbmv.$(SUFFIX) cspmv.$(SUFFIX) \ - cspr.$(SUFFIX) cspr2.$(SUFFIX) \ - csymv.$(SUFFIX) csyr.$(SUFFIX) \ + csbmv.$(SUFFIX) \ + cspr2.$(SUFFIX) \ ctbsv.$(SUFFIX) ctbmv.$(SUFFIX) \ ctpsv.$(SUFFIX) ctpmv.$(SUFFIX) \ chemv.$(SUFFIX) chbmv.$(SUFFIX) \ @@ -122,9 +121,8 @@ ZBLAS2OBJS = \ zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \ ztrsv.$(SUFFIX) ztrmv.$(SUFFIX) \ zsyr2.$(SUFFIX) zgbmv.$(SUFFIX) \ - zsbmv.$(SUFFIX) zspmv.$(SUFFIX) \ - zspr.$(SUFFIX) zspr2.$(SUFFIX) \ - zsymv.$(SUFFIX) zsyr.$(SUFFIX) \ + zsbmv.$(SUFFIX) \ + zspr2.$(SUFFIX) \ ztbsv.$(SUFFIX) ztbmv.$(SUFFIX) \ ztpsv.$(SUFFIX) ztpmv.$(SUFFIX) \ zhemv.$(SUFFIX) zhbmv.$(SUFFIX) \ From f7cf637d7aad0990625f41f83db74446a5908509 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 28 Mar 2023 18:31:04 +0200 Subject: [PATCH 7/8] redo lost edit --- interface/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index 6f320d8f7..275b71a1c 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -445,7 +445,8 @@ QLAPACKOBJS = \ CLAPACKOBJS = \ cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ - clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) + clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) \ + cspr.$(SUFFIX) cspmv.$(SUFFIX) csymv.$(SUFFIX) csyr.$(SUFFIX) #ZLAPACKOBJS = \ # zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ @@ -456,8 +457,8 @@ CLAPACKOBJS = \ ZLAPACKOBJS = \ zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ - zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) - + zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) \ + zspr.$(SUFFIX) zspmv.$(SUFFIX) zsymv.$(SUFFIX) zsyr.$(SUFFIX) XLAPACKOBJS = \ xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \ From f89e0034a479016ab5d9e1681abf07dab7f8cf38 Mon Sep 17 00:00:00 2001 From: kseniyazaytseva Date: Wed, 20 Dec 2023 21:20:30 +0300 Subject: [PATCH 8/8] Fix LAPACK usage from BLAS --- interface/Makefile | 7 +++---- lapack-netlib/SRC/Makefile | 12 ++++-------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/interface/Makefile b/interface/Makefile index 275b71a1c..6f320d8f7 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -445,8 +445,7 @@ QLAPACKOBJS = \ CLAPACKOBJS = \ cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ - clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) \ - cspr.$(SUFFIX) cspmv.$(SUFFIX) csymv.$(SUFFIX) csyr.$(SUFFIX) + clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) #ZLAPACKOBJS = \ # zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ @@ -457,8 +456,8 @@ CLAPACKOBJS = \ ZLAPACKOBJS = \ zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ - zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) \ - zspr.$(SUFFIX) zspmv.$(SUFFIX) zsymv.$(SUFFIX) zsyr.$(SUFFIX) + zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) ztrtrs.$(SUFFIX) + XLAPACKOBJS = \ xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \ diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 5f22789bd..49798b0c5 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -572,26 +572,22 @@ ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o SLAPACKOBJS = \ sgetrf.o sgetrs.o spotrf.o sgetf2.o \ spotf2.o slaswp.o sgesv.o slauu2.o \ - slauum.o strti2.o strtri.o strtrs.o \ - ssymv.o ssyr.o sspmv.o sspr.o + slauum.o strti2.o strtri.o strtrs.o DLAPACKOBJS = \ dgetrf.o dgetrs.o dpotrf.o dgetf2.o \ dpotf2.o dlaswp.o dgesv.o dlauu2.o \ - dlauum.o dtrti2.o dtrtri.o dtrtrs.o \ - dsymv.o dsyr.o dspmv.o dspr.o + dlauum.o dtrti2.o dtrtri.o dtrtrs.o CLAPACKOBJS = \ cgetrf.o cgetrs.o cpotrf.o cgetf2.o \ cpotf2.o claswp.o cgesv.o clauu2.o \ - clauum.o ctrti2.o ctrtri.o ctrtrs.o \ - csymv.o csyr.o cspmv.o cspr.o + clauum.o ctrti2.o ctrtri.o ctrtrs.o ZLAPACKOBJS = \ zgetrf.o zgetrs.o zpotrf.o zgetf2.o \ zpotf2.o zlaswp.o zgesv.o zlauu2.o \ - zlauum.o ztrti2.o ztrtri.o ztrtrs.o \ - zsymv.o zsyr.o zspmv.o zspr.o + zlauum.o ztrti2.o ztrtri.o ztrtrs.o ALLAUX = $(filter-out $(ALL_AUX_OBJS),$(ALLAUX_O)) SLASRC = $(filter-out $(SLAPACKOBJS),$(SLASRC_O))