Merge pull request #4452 from kseniyazaytseva/riscv-generic

Fix BLAS, BLAS-like functions and Generic RISC-V kernels
This commit is contained in:
Martin Kroeker 2024-01-24 17:52:25 +01:00 committed by GitHub
commit a21b2fa5e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 206 additions and 124 deletions

View File

@ -289,6 +289,14 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);

View File

@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double *
void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *,
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *);
void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *,
float *, blasint *, float *, blasint *, float *, float *, blasint *);
void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *,
double *, blasint *, double *, blasint *, double *, double *, blasint *);
void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *,
float *, blasint *, float *, blasint *, float *, float *, blasint *);
void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *,
double *, blasint *, double *, blasint *, double *, double *, blasint *);
int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *,
float *, float *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *,
float *, float *, blasint *); float *, float *, blasint *);

View File

@ -96,7 +96,7 @@
INTEGER ICAMAXTEST INTEGER ICAMAXTEST
EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST
* .. External Subroutines .. * .. External Subroutines ..
EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1 EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1
* .. Intrinsic Functions .. * .. Intrinsic Functions ..
INTRINSIC MAX INTRINSIC MAX
* .. Common blocks .. * .. Common blocks ..
@ -214,8 +214,8 @@
CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1), CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1),
+ STRUE4(NP1),SFAC) + STRUE4(NP1),SFAC)
ELSE IF (ICASE.EQ.8) THEN ELSE IF (ICASE.EQ.8) THEN
* .. CSCAL .. * .. CSCALTEST ..
CALL CSCAL(N,CA,CX,INCX) CALL CSCALTEST(N,CA,CX,INCX)
CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX),
+ SFAC) + SFAC)
ELSE IF (ICASE.EQ.9) THEN ELSE IF (ICASE.EQ.9) THEN
@ -236,14 +236,14 @@
* *
INCX = 1 INCX = 1
IF (ICASE.EQ.8) THEN IF (ICASE.EQ.8) THEN
* CSCAL * CSCALTEST
* Add a test for alpha equal to zero. * Add a test for alpha equal to zero.
CA = (0.0E0,0.0E0) CA = (0.0E0,0.0E0)
DO 80 I = 1, 5 DO 80 I = 1, 5
MWPCT(I) = (0.0E0,0.0E0) MWPCT(I) = (0.0E0,0.0E0)
MWPCS(I) = (1.0E0,1.0E0) MWPCS(I) = (1.0E0,1.0E0)
80 CONTINUE 80 CONTINUE
CALL CSCAL(5,CA,CX,INCX) CALL CSCALTEST(5,CA,CX,INCX)
CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC)
ELSE IF (ICASE.EQ.9) THEN ELSE IF (ICASE.EQ.9) THEN
* CSSCALTEST * CSSCALTEST

View File

@ -685,7 +685,7 @@ real *sfac;
static integer i__; static integer i__;
extern /* Subroutine */ int ctest_(); extern /* Subroutine */ int ctest_();
static complex mwpcs[5], mwpct[5]; static complex mwpcs[5], mwpct[5];
extern /* Subroutine */ int itest1_(), stest1_(); extern /* Subroutine */ int cscaltest_(), itest1_(), stest1_();
static complex cx[8]; static complex cx[8];
extern real scnrm2test_(); extern real scnrm2test_();
static integer np1; static integer np1;
@ -727,7 +727,7 @@ real *sfac;
stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac);
} else if (combla_1.icase == 8) { } else if (combla_1.icase == 8) {
/* .. CSCAL .. */ /* .. CSCAL .. */
cscal_(&combla_1.n, &ca, cx, &combla_1.incx); cscaltest_(&combla_1.n, &ca, cx, &combla_1.incx);
ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48],
&ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac);
} else if (combla_1.icase == 9) { } else if (combla_1.icase == 9) {
@ -761,7 +761,7 @@ real *sfac;
mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.; mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.;
/* L80: */ /* L80: */
} }
cscal_(&c__5, &ca, cx, &combla_1.incx); cscaltest_(&c__5, &ca, cx, &combla_1.incx);
ctest_(&c__5, cx, mwpct, mwpcs, sfac); ctest_(&c__5, cx, mwpct, mwpcs, sfac);
} else if (combla_1.icase == 9) { } else if (combla_1.icase == 9) {
/* CSSCALTEST */ /* CSSCALTEST */

View File

@ -35,29 +35,26 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "common.h" #include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif
#ifndef COMPLEX #ifndef COMPLEX
#define SMP_THRESHOLD_MIN 65536.0 #define SMP_THRESHOLD_MIN 65536.0
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "QGEMT " #define ERROR_NAME "QGEMMT "
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define ERROR_NAME "DGEMT " #define ERROR_NAME "DGEMMT "
#elif defined(BFLOAT16) #elif defined(BFLOAT16)
#define ERROR_NAME "SBGEMT " #define ERROR_NAME "SBGEMMT "
#else #else
#define ERROR_NAME "SGEMT " #define ERROR_NAME "SGEMMT "
#endif #endif
#else #else
#define SMP_THRESHOLD_MIN 8192.0 #define SMP_THRESHOLD_MIN 8192.0
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "XGEMT " #define ERROR_NAME "XGEMMT "
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define ERROR_NAME "ZGEMT " #define ERROR_NAME "ZGEMMT "
#else #else
#define ERROR_NAME "CGEMT " #define ERROR_NAME "CGEMMT "
#endif #endif
#endif #endif
@ -68,18 +65,22 @@
#ifndef CBLAS #ifndef CBLAS
void NAME(char *UPLO, char *TRANSA, char *TRANSB, void NAME(char *UPLO, char *TRANSA, char *TRANSB,
blasint * M, blasint * N, blasint * K, blasint * M, blasint * K,
FLOAT * Alpha, FLOAT * Alpha,
IFLOAT * a, blasint * ldA, IFLOAT * a, blasint * ldA,
IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC)
{ {
blasint m, n, k; blasint m, k;
blasint lda, ldb, ldc; blasint lda, ldb, ldc;
int transa, transb, uplo; int transa, transb, uplo;
blasint info; blasint info;
char transA, transB, Uplo; char transA, transB, Uplo;
blasint nrowa, nrowb;
#if defined(COMPLEX)
blasint ncolb;
#endif
IFLOAT *buffer; IFLOAT *buffer;
IFLOAT *aa, *bb; IFLOAT *aa, *bb;
FLOAT *cc; FLOAT *cc;
@ -92,7 +93,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
PRINT_DEBUG_NAME; PRINT_DEBUG_NAME;
m = *M; m = *M;
n = *N;
k = *K; k = *K;
#if defined(COMPLEX) #if defined(COMPLEX)
@ -159,32 +159,47 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
if (Uplo == 'L') if (Uplo == 'L')
uplo = 1; uplo = 1;
nrowa = m;
if (transa & 1) nrowa = k;
nrowb = k;
#if defined(COMPLEX)
ncolb = m;
#endif
if (transb & 1) {
nrowb = m;
#if defined(COMPLEX)
ncolb = k;
#endif
}
info = 0; info = 0;
if (uplo < 0) if (ldc < MAX(1, m))
info = 14;
if (ldc < m)
info = 13; info = 13;
if (ldb < MAX(1, nrowb))
info = 10;
if (lda < MAX(1, nrowa))
info = 8;
if (k < 0) if (k < 0)
info = 5; info = 5;
if (n < 0)
info = 4;
if (m < 0) if (m < 0)
info = 3; info = 4;
if (transb < 0) if (transb < 0)
info = 2; info = 3;
if (transa < 0) if (transa < 0)
info = 2;
if (uplo < 0)
info = 1; info = 1;
if (info) { if (info != 0) {
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
return; return;
} }
#else #else
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m,
blasint N, blasint k, blasint k,
#ifndef COMPLEX #ifndef COMPLEX
FLOAT alpha, FLOAT alpha,
IFLOAT * A, blasint LDA, IFLOAT * A, blasint LDA,
@ -205,17 +220,23 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
int transa, transb, uplo; int transa, transb, uplo;
blasint info; blasint info;
blasint m, n, lda, ldb; blasint lda, ldb;
FLOAT *a, *b; FLOAT *a, *b;
#if defined(COMPLEX)
blasint nrowb, ncolb;
#endif
XFLOAT *buffer; XFLOAT *buffer;
PRINT_DEBUG_CNAME; PRINT_DEBUG_CNAME;
uplo = -1;
transa = -1; transa = -1;
transb = -1; transb = -1;
info = 0; info = 0;
if (order == CblasColMajor) { if (order == CblasColMajor) {
if (Uplo == CblasUpper) uplo = 0;
if (Uplo == CblasLower) uplo = 1;
if (TransA == CblasNoTrans) if (TransA == CblasNoTrans)
transa = 0; transa = 0;
@ -248,9 +269,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
transb = 3; transb = 3;
#endif #endif
m = M;
n = N;
a = (void *)A; a = (void *)A;
b = (void *)B; b = (void *)B;
lda = LDA; lda = LDA;
@ -258,23 +276,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
info = -1; info = -1;
if (ldc < m) blasint nrowa;
#if !defined(COMPLEX)
blasint nrowb;
#endif
nrowa = m;
if (transa & 1) nrowa = k;
nrowb = k;
#if defined(COMPLEX)
ncolb = m;
#endif
if (transb & 1) {
nrowb = m;
#if defined(COMPLEX)
ncolb = k;
#endif
}
if (ldc < MAX(1, m))
info = 13; info = 13;
if (ldb < MAX(1, nrowb))
info = 10;
if (lda < MAX(1, nrowa))
info = 8;
if (k < 0) if (k < 0)
info = 5; info = 5;
if (n < 0)
info = 4;
if (m < 0) if (m < 0)
info = 3; info = 4;
if (transb < 0) if (transb < 0)
info = 2; info = 3;
if (transa < 0) if (transa < 0)
info = 2;
if (uplo < 0)
info = 1; info = 1;
} }
if (order == CblasRowMajor) { if (order == CblasRowMajor) {
m = N;
n = M;
a = (void *)B; a = (void *)B;
b = (void *)A; b = (void *)A;
@ -282,6 +319,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
lda = LDB; lda = LDB;
ldb = LDA; ldb = LDA;
if (Uplo == CblasUpper) uplo = 0;
if (Uplo == CblasLower) uplo = 1;
if (TransB == CblasNoTrans) if (TransB == CblasNoTrans)
transa = 0; transa = 0;
if (TransB == CblasTrans) if (TransB == CblasTrans)
@ -315,29 +355,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
info = -1; info = -1;
if (ldc < m) blasint ncola;
#if !defined(COMPLEX)
blasint ncolb;
#endif
ncola = m;
if (transa & 1) ncola = k;
ncolb = k;
#if defined(COMPLEX)
nrowb = m;
#endif
if (transb & 1) {
#if defined(COMPLEX)
nrowb = k;
#endif
ncolb = m;
}
if (ldc < MAX(1,m))
info = 13; info = 13;
if (ldb < MAX(1, ncolb))
info = 8;
if (lda < MAX(1, ncola))
info = 10;
if (k < 0) if (k < 0)
info = 5; info = 5;
if (n < 0)
info = 4;
if (m < 0) if (m < 0)
info = 3; info = 4;
if (transb < 0) if (transb < 0)
info = 2; info = 2;
if (transa < 0) if (transa < 0)
info = 3;
if (uplo < 0)
info = 1; info = 1;
} }
uplo = -1;
if (Uplo == CblasUpper)
uplo = 0;
if (Uplo == CblasLower)
uplo = 1;
if (uplo < 0)
info = 14;
if (info >= 0) { if (info >= 0) {
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
return; return;
@ -407,37 +460,48 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif #endif
if ((m == 0) || (n == 0)) if (m == 0)
return; return;
IDEBUG_START; IDEBUG_START;
FUNCTION_PROFILE_START(); #if defined(COMPLEX)
if (transb > 1){
#ifndef CBLAS
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb);
#else
if (order == CblasColMajor)
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb);
if (order == CblasRowMajor)
IMATCOPY_K_RNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb);
#endif
}
#endif
const blasint incb = (transb == 0) ? 1 : ldb; const blasint incb = ((transb & 1) == 0) ? 1 : ldb;
if (uplo == 1) { if (uplo == 1) {
for (i = 0; i < n; i++) { for (i = 0; i < m; i++) {
j = n - i; j = m - i;
l = j; l = j;
#if defined(COMPLEX) #if defined(COMPLEX)
aa = a + i * 2; aa = a + i * 2;
bb = b + i * ldb * 2; bb = b + i * ldb * 2;
if (transa) { if (transa & 1) {
l = k;
aa = a + lda * i * 2; aa = a + lda * i * 2;
bb = b + i * 2;
} }
if (transb & 1)
bb = b + i * 2;
cc = c + i * 2 * ldc + i * 2; cc = c + i * 2 * ldc + i * 2;
#else #else
aa = a + i; aa = a + i;
bb = b + i * ldb; bb = b + i * ldb;
if (transa) { if (transa & 1) {
l = k;
aa = a + lda * i; aa = a + lda * i;
bb = b + i;
} }
if (transb & 1)
bb = b + i;
cc = c + i * ldc + i; cc = c + i * ldc + i;
#endif #endif
@ -447,7 +511,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
NULL, 0); NULL, 0);
if (alpha_r == ZERO && alpha_i == ZERO) if (alpha_r == ZERO && alpha_i == ZERO)
return; continue;
#else #else
if (beta != ONE) if (beta != ONE)
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
@ -458,8 +522,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
IDEBUG_START; IDEBUG_START;
FUNCTION_PROFILE_START();
buffer_size = j + k + 128 / sizeof(FLOAT); buffer_size = j + k + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI #ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT); buffer_size += 160 / sizeof(FLOAT);
@ -479,20 +541,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif #endif
#if defined(COMPLEX) #if defined(COMPLEX)
if (!(transa & 1))
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
aa, lda, bb, incb, cc, 1, aa, lda, bb, incb, cc, 1,
buffer); buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i,
aa, lda, bb, incb, cc, 1,
buffer);
#else #else
if (!(transa & 1))
(gemv[(int)transa]) (j, k, 0, alpha, aa, lda, (gemv[(int)transa]) (j, k, 0, alpha, aa, lda,
bb, incb, cc, 1, buffer); bb, incb, cc, 1, buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha, aa, lda,
bb, incb, cc, 1, buffer);
#endif #endif
#ifdef SMP #ifdef SMP
} else { } else {
if (!(transa & 1))
(gemv_thread[(int)transa]) (j, k, alpha, aa, (gemv_thread[(int)transa]) (j, k, alpha, aa,
lda, bb, incb, cc, lda, bb, incb, cc,
1, buffer, 1, buffer,
nthreads); nthreads);
else
(gemv_thread[(int)transa]) (k, j, alpha, aa,
lda, bb, incb, cc,
1, buffer,
nthreads);
} }
#endif #endif
@ -501,21 +577,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
} }
} else { } else {
for (i = 0; i < n; i++) { for (i = 0; i < m; i++) {
j = i + 1; j = i + 1;
l = j; l = j;
#if defined COMPLEX #if defined COMPLEX
bb = b + i * ldb * 2; bb = b + i * ldb * 2;
if (transa) { if (transb & 1) {
l = k;
bb = b + i * 2; bb = b + i * 2;
} }
cc = c + i * 2 * ldc; cc = c + i * 2 * ldc;
#else #else
bb = b + i * ldb; bb = b + i * ldb;
if (transa) { if (transb & 1) {
l = k;
bb = b + i; bb = b + i;
} }
cc = c + i * ldc; cc = c + i * ldc;
@ -527,7 +601,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
NULL, 0); NULL, 0);
if (alpha_r == ZERO && alpha_i == ZERO) if (alpha_r == ZERO && alpha_i == ZERO)
return; continue;
#else #else
if (beta != ONE) if (beta != ONE)
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
@ -537,8 +611,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif #endif
IDEBUG_START; IDEBUG_START;
FUNCTION_PROFILE_START();
buffer_size = j + k + 128 / sizeof(FLOAT); buffer_size = j + k + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI #ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT); buffer_size += 160 / sizeof(FLOAT);
@ -558,32 +630,41 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif #endif
#if defined(COMPLEX) #if defined(COMPLEX)
if (!(transa & 1))
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
a, lda, bb, incb, cc, 1, a, lda, bb, incb, cc, 1,
buffer); buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i,
a, lda, bb, incb, cc, 1,
buffer);
#else #else
if (!(transa & 1))
(gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, (gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb,
incb, cc, 1, buffer); incb, cc, 1, buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb,
incb, cc, 1, buffer);
#endif #endif
#ifdef SMP #ifdef SMP
} else { } else {
if (!(transa & 1))
(gemv_thread[(int)transa]) (j, k, alpha, a, lda, (gemv_thread[(int)transa]) (j, k, alpha, a, lda,
bb, incb, cc, 1, bb, incb, cc, 1,
buffer, nthreads); buffer, nthreads);
else
(gemv_thread[(int)transa]) (k, j, alpha, a, lda,
bb, incb, cc, 1,
buffer, nthreads);
} }
#endif #endif
STACK_FREE(buffer); STACK_FREE(buffer);
} }
} }
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE,
args.m * args.k + args.k * args.n +
args.m * args.n, 2 * args.m * args.n * args.k);
IDEBUG_END; IDEBUG_END;
return; return;
} }

View File

@ -149,10 +149,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
#endif #endif
if ( *lda > *ldb ) if ( *rows > *cols )
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT); msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT);
else else
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT); msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT);
b = malloc(msize); b = malloc(msize);
if ( b == NULL ) if ( b == NULL )

View File

@ -96,12 +96,6 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
else else
{ {
dp2 = *dd2 * dy1; dp2 = *dd2 * dy1;
if(dp2 == ZERO)
{
dflag = -TWO;
dparam[0] = dflag;
return;
}
dp1 = *dd1 * *dx1; dp1 = *dd1 * *dx1;
dq2 = dp2 * dy1; dq2 = dp2 * dy1;
dq1 = dp1 * *dx1; dq1 = dp1 * *dx1;
@ -113,24 +107,10 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
dh12 = dp2 / dp1; dh12 = dp2 / dp1;
du = ONE - dh12 * dh21; du = ONE - dh12 * dh21;
if(du > ZERO) dflag = ZERO;
{ *dd1 = *dd1 / du;
dflag = ZERO; *dd2 = *dd2 / du;
*dd1 = *dd1 / du; *dx1 = *dx1 * du;
*dd2 = *dd2 / du;
*dx1 = *dx1 * du;
} else {
dflag = -ONE;
dh11 = ZERO;
dh12 = ZERO;
dh21 = ZERO;
dh22 = ZERO;
*dd1 = ZERO;
*dd2 = ZERO;
*dx1 = ZERO;
}
} }
else else

View File

@ -171,10 +171,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
} }
#endif #endif
if ( *lda > *ldb ) if ( *rows > *cols )
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2; msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT) * 2;
else else
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2; msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT) * 2;
b = malloc(msize); b = malloc(msize);
if ( b == NULL ) if ( b == NULL )

View File

@ -40,7 +40,6 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a,
if ( rows <= 0 ) return(0); if ( rows <= 0 ) return(0);
if ( cols <= 0 ) return(0); if ( cols <= 0 ) return(0);
if ( alpha_r == 1.0 && alpha_i == 0.0 ) return (0);
aptr = a; aptr = a;
lda *= 2; lda *= 2;

View File

@ -45,6 +45,11 @@ DAXPYKERNEL = ../riscv64/axpy.c
CAXPYKERNEL = ../riscv64/zaxpy.c CAXPYKERNEL = ../riscv64/zaxpy.c
ZAXPYKERNEL = ../riscv64/zaxpy.c ZAXPYKERNEL = ../riscv64/zaxpy.c
SAXPBYKERNEL = ../riscv64/axpby.c
DAXPBYKERNEL = ../riscv64/axpby.c
CAXPBYKERNEL = ../riscv64/zaxpby.c
ZAXPBYKERNEL = ../riscv64/zaxpby.c
SCOPYKERNEL = ../riscv64/copy.c SCOPYKERNEL = ../riscv64/copy.c
DCOPYKERNEL = ../riscv64/copy.c DCOPYKERNEL = ../riscv64/copy.c
CCOPYKERNEL = ../riscv64/zcopy.c CCOPYKERNEL = ../riscv64/zcopy.c

View File

@ -33,7 +33,7 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix,iy; BLASLONG ix,iy;
if ( n < 0 ) return(0); if ( n <= 0 ) return(0);
ix = 0; ix = 0;
iy = 0; iy = 0;

View File

@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix,iy; BLASLONG ix,iy;
if ( n < 0 ) return(0); if ( n <= 0 ) return(0);
if ( da == 0.0 ) return(0); if ( da == 0.0 ) return(0);
ix = 0; ix = 0;

View File

@ -41,7 +41,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
if ( n < 0 ) return(0); if ( n <= 0 ) return(0);
while(i < n) while(i < n)
{ {

View File

@ -46,7 +46,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
double dot = 0.0 ; double dot = 0.0 ;
if ( n < 0 ) return(dot); if ( n < 1 ) return(dot);
while(i < n) while(i < n)
{ {

View File

@ -41,7 +41,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT temp; FLOAT temp;
if ( n < 0 ) return(0); if ( n <= 0 ) return(0);
while(i < n) while(i < n)
{ {

View File

@ -44,7 +44,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
BLASLONG inc_x2; BLASLONG inc_x2;
BLASLONG inc_y2; BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n <= 0 ) return(0);
if ( da_r == 0.0 && da_i == 0.0 ) return(0); if ( da_r == 0.0 && da_i == 0.0 ) return(0);
ix = 0; ix = 0;

View File

@ -43,7 +43,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
BLASLONG inc_x2; BLASLONG inc_x2;
BLASLONG inc_y2; BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n <= 0 ) return(0);
inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;

View File

@ -45,7 +45,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
BLASLONG inc_x2; BLASLONG inc_x2;
BLASLONG inc_y2; BLASLONG inc_y2;
if ( n < 0 ) return(0); if ( n <= 0 ) return(0);
inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;