Fix BLAS, BLAS-like functions and Generic RISC-V kernels
* Fixed gemmt, imatcopy, zimatcopy_cnc functions * Fixed cblas_cscal testing in ctest * Removed rotmg unreacheble code * Added zero size checks
This commit is contained in:
parent
88e994116c
commit
ff41cf5c49
8
cblas.h
8
cblas.h
|
@ -289,6 +289,14 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
|
|||
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
|
||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
|
||||
|
||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
|
||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
|
||||
|
|
|
@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double *
|
|||
void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *,
|
||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *, double *, blasint *);
|
||||
void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *,
|
||||
double *, blasint *, double *, blasint *, double *, double *, blasint *);
|
||||
|
||||
int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *,
|
||||
float *, float *, blasint *, float *, blasint *,
|
||||
float *, float *, blasint *);
|
||||
|
|
|
@ -96,7 +96,7 @@
|
|||
INTEGER ICAMAXTEST
|
||||
EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST
|
||||
* .. External Subroutines ..
|
||||
EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1
|
||||
EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1
|
||||
* .. Intrinsic Functions ..
|
||||
INTRINSIC MAX
|
||||
* .. Common blocks ..
|
||||
|
@ -214,8 +214,8 @@
|
|||
CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1),
|
||||
+ STRUE4(NP1),SFAC)
|
||||
ELSE IF (ICASE.EQ.8) THEN
|
||||
* .. CSCAL ..
|
||||
CALL CSCAL(N,CA,CX,INCX)
|
||||
* .. CSCALTEST ..
|
||||
CALL CSCALTEST(N,CA,CX,INCX)
|
||||
CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX),
|
||||
+ SFAC)
|
||||
ELSE IF (ICASE.EQ.9) THEN
|
||||
|
@ -236,14 +236,14 @@
|
|||
*
|
||||
INCX = 1
|
||||
IF (ICASE.EQ.8) THEN
|
||||
* CSCAL
|
||||
* CSCALTEST
|
||||
* Add a test for alpha equal to zero.
|
||||
CA = (0.0E0,0.0E0)
|
||||
DO 80 I = 1, 5
|
||||
MWPCT(I) = (0.0E0,0.0E0)
|
||||
MWPCS(I) = (1.0E0,1.0E0)
|
||||
80 CONTINUE
|
||||
CALL CSCAL(5,CA,CX,INCX)
|
||||
CALL CSCALTEST(5,CA,CX,INCX)
|
||||
CALL CTEST(5,CX,MWPCT,MWPCS,SFAC)
|
||||
ELSE IF (ICASE.EQ.9) THEN
|
||||
* CSSCALTEST
|
||||
|
|
|
@ -685,7 +685,7 @@ real *sfac;
|
|||
static integer i__;
|
||||
extern /* Subroutine */ int ctest_();
|
||||
static complex mwpcs[5], mwpct[5];
|
||||
extern /* Subroutine */ int itest1_(), stest1_();
|
||||
extern /* Subroutine */ int cscaltest_(), itest1_(), stest1_();
|
||||
static complex cx[8];
|
||||
extern real scnrm2test_();
|
||||
static integer np1;
|
||||
|
@ -727,7 +727,7 @@ real *sfac;
|
|||
stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac);
|
||||
} else if (combla_1.icase == 8) {
|
||||
/* .. CSCAL .. */
|
||||
cscal_(&combla_1.n, &ca, cx, &combla_1.incx);
|
||||
cscaltest_(&combla_1.n, &ca, cx, &combla_1.incx);
|
||||
ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48],
|
||||
&ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac);
|
||||
} else if (combla_1.icase == 9) {
|
||||
|
@ -761,7 +761,7 @@ real *sfac;
|
|||
mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.;
|
||||
/* L80: */
|
||||
}
|
||||
cscal_(&c__5, &ca, cx, &combla_1.incx);
|
||||
cscaltest_(&c__5, &ca, cx, &combla_1.incx);
|
||||
ctest_(&c__5, cx, mwpct, mwpcs, sfac);
|
||||
} else if (combla_1.icase == 9) {
|
||||
/* CSSCALTEST */
|
||||
|
|
|
@ -35,29 +35,26 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#define SMP_THRESHOLD_MIN 65536.0
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "QGEMT "
|
||||
#define ERROR_NAME "QGEMMT "
|
||||
#elif defined(DOUBLE)
|
||||
#define ERROR_NAME "DGEMT "
|
||||
#define ERROR_NAME "DGEMMT "
|
||||
#elif defined(BFLOAT16)
|
||||
#define ERROR_NAME "SBGEMT "
|
||||
#define ERROR_NAME "SBGEMMT "
|
||||
#else
|
||||
#define ERROR_NAME "SGEMT "
|
||||
#define ERROR_NAME "SGEMMT "
|
||||
#endif
|
||||
#else
|
||||
#define SMP_THRESHOLD_MIN 8192.0
|
||||
#ifdef XDOUBLE
|
||||
#define ERROR_NAME "XGEMT "
|
||||
#define ERROR_NAME "XGEMMT "
|
||||
#elif defined(DOUBLE)
|
||||
#define ERROR_NAME "ZGEMT "
|
||||
#define ERROR_NAME "ZGEMMT "
|
||||
#else
|
||||
#define ERROR_NAME "CGEMT "
|
||||
#define ERROR_NAME "CGEMMT "
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -68,18 +65,22 @@
|
|||
#ifndef CBLAS
|
||||
|
||||
void NAME(char *UPLO, char *TRANSA, char *TRANSB,
|
||||
blasint * M, blasint * N, blasint * K,
|
||||
blasint * M, blasint * K,
|
||||
FLOAT * Alpha,
|
||||
IFLOAT * a, blasint * ldA,
|
||||
IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC)
|
||||
{
|
||||
|
||||
blasint m, n, k;
|
||||
blasint m, k;
|
||||
blasint lda, ldb, ldc;
|
||||
int transa, transb, uplo;
|
||||
blasint info;
|
||||
|
||||
char transA, transB, Uplo;
|
||||
blasint nrowa, nrowb;
|
||||
#if defined(COMPLEX)
|
||||
blasint ncolb;
|
||||
#endif
|
||||
IFLOAT *buffer;
|
||||
IFLOAT *aa, *bb;
|
||||
FLOAT *cc;
|
||||
|
@ -92,7 +93,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
|
|||
PRINT_DEBUG_NAME;
|
||||
|
||||
m = *M;
|
||||
n = *N;
|
||||
k = *K;
|
||||
|
||||
#if defined(COMPLEX)
|
||||
|
@ -159,32 +159,47 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
|
|||
if (Uplo == 'L')
|
||||
uplo = 1;
|
||||
|
||||
nrowa = m;
|
||||
if (transa & 1) nrowa = k;
|
||||
nrowb = k;
|
||||
#if defined(COMPLEX)
|
||||
ncolb = m;
|
||||
#endif
|
||||
if (transb & 1) {
|
||||
nrowb = m;
|
||||
#if defined(COMPLEX)
|
||||
ncolb = k;
|
||||
#endif
|
||||
}
|
||||
|
||||
info = 0;
|
||||
|
||||
if (uplo < 0)
|
||||
info = 14;
|
||||
if (ldc < m)
|
||||
if (ldc < MAX(1, m))
|
||||
info = 13;
|
||||
if (ldb < MAX(1, nrowb))
|
||||
info = 10;
|
||||
if (lda < MAX(1, nrowa))
|
||||
info = 8;
|
||||
if (k < 0)
|
||||
info = 5;
|
||||
if (n < 0)
|
||||
info = 4;
|
||||
if (m < 0)
|
||||
info = 3;
|
||||
info = 4;
|
||||
if (transb < 0)
|
||||
info = 2;
|
||||
info = 3;
|
||||
if (transa < 0)
|
||||
info = 2;
|
||||
if (uplo < 0)
|
||||
info = 1;
|
||||
|
||||
if (info) {
|
||||
if (info != 0) {
|
||||
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
#else
|
||||
|
||||
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
||||
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M,
|
||||
blasint N, blasint k,
|
||||
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m,
|
||||
blasint k,
|
||||
#ifndef COMPLEX
|
||||
FLOAT alpha,
|
||||
IFLOAT * A, blasint LDA,
|
||||
|
@ -205,17 +220,23 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
|
||||
int transa, transb, uplo;
|
||||
blasint info;
|
||||
blasint m, n, lda, ldb;
|
||||
blasint lda, ldb;
|
||||
FLOAT *a, *b;
|
||||
#if defined(COMPLEX)
|
||||
blasint nrowb, ncolb;
|
||||
#endif
|
||||
XFLOAT *buffer;
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
uplo = -1;
|
||||
transa = -1;
|
||||
transb = -1;
|
||||
info = 0;
|
||||
|
||||
if (order == CblasColMajor) {
|
||||
if (Uplo == CblasUpper) uplo = 0;
|
||||
if (Uplo == CblasLower) uplo = 1;
|
||||
|
||||
if (TransA == CblasNoTrans)
|
||||
transa = 0;
|
||||
|
@ -248,9 +269,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
transb = 3;
|
||||
#endif
|
||||
|
||||
m = M;
|
||||
n = N;
|
||||
|
||||
a = (void *)A;
|
||||
b = (void *)B;
|
||||
lda = LDA;
|
||||
|
@ -258,23 +276,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
|
||||
info = -1;
|
||||
|
||||
if (ldc < m)
|
||||
blasint nrowa;
|
||||
#if !defined(COMPLEX)
|
||||
blasint nrowb;
|
||||
#endif
|
||||
nrowa = m;
|
||||
if (transa & 1) nrowa = k;
|
||||
nrowb = k;
|
||||
#if defined(COMPLEX)
|
||||
ncolb = m;
|
||||
#endif
|
||||
if (transb & 1) {
|
||||
nrowb = m;
|
||||
#if defined(COMPLEX)
|
||||
ncolb = k;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (ldc < MAX(1, m))
|
||||
info = 13;
|
||||
if (ldb < MAX(1, nrowb))
|
||||
info = 10;
|
||||
if (lda < MAX(1, nrowa))
|
||||
info = 8;
|
||||
if (k < 0)
|
||||
info = 5;
|
||||
if (n < 0)
|
||||
info = 4;
|
||||
if (m < 0)
|
||||
info = 3;
|
||||
info = 4;
|
||||
if (transb < 0)
|
||||
info = 2;
|
||||
info = 3;
|
||||
if (transa < 0)
|
||||
info = 2;
|
||||
if (uplo < 0)
|
||||
info = 1;
|
||||
}
|
||||
|
||||
if (order == CblasRowMajor) {
|
||||
m = N;
|
||||
n = M;
|
||||
|
||||
a = (void *)B;
|
||||
b = (void *)A;
|
||||
|
@ -282,6 +319,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
lda = LDB;
|
||||
ldb = LDA;
|
||||
|
||||
if (Uplo == CblasUpper) uplo = 0;
|
||||
if (Uplo == CblasLower) uplo = 1;
|
||||
|
||||
if (TransB == CblasNoTrans)
|
||||
transa = 0;
|
||||
if (TransB == CblasTrans)
|
||||
|
@ -315,28 +355,41 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
|
||||
info = -1;
|
||||
|
||||
if (ldc < m)
|
||||
blasint ncola;
|
||||
#if !defined(COMPLEX)
|
||||
blasint ncolb;
|
||||
#endif
|
||||
ncola = m;
|
||||
if (transa & 1) ncola = k;
|
||||
ncolb = k;
|
||||
#if defined(COMPLEX)
|
||||
nrowb = m;
|
||||
#endif
|
||||
|
||||
if (transb & 1) {
|
||||
#if defined(COMPLEX)
|
||||
nrowb = k;
|
||||
#endif
|
||||
ncolb = m;
|
||||
}
|
||||
|
||||
if (ldc < MAX(1,m))
|
||||
info = 13;
|
||||
if (ldb < MAX(1, ncolb))
|
||||
info = 8;
|
||||
if (lda < MAX(1, ncola))
|
||||
info = 10;
|
||||
if (k < 0)
|
||||
info = 5;
|
||||
if (n < 0)
|
||||
info = 4;
|
||||
if (m < 0)
|
||||
info = 3;
|
||||
info = 4;
|
||||
if (transb < 0)
|
||||
info = 2;
|
||||
if (transa < 0)
|
||||
info = 1;
|
||||
|
||||
}
|
||||
|
||||
uplo = -1;
|
||||
if (Uplo == CblasUpper)
|
||||
uplo = 0;
|
||||
if (Uplo == CblasLower)
|
||||
uplo = 1;
|
||||
info = 3;
|
||||
if (uplo < 0)
|
||||
info = 14;
|
||||
info = 1;
|
||||
}
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
|
@ -407,37 +460,48 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
|
||||
#endif
|
||||
|
||||
if ((m == 0) || (n == 0))
|
||||
if (m == 0)
|
||||
return;
|
||||
|
||||
IDEBUG_START;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
#if defined(COMPLEX)
|
||||
if (transb > 1){
|
||||
#ifndef CBLAS
|
||||
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb);
|
||||
#else
|
||||
if (order == CblasColMajor)
|
||||
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb);
|
||||
if (order == CblasRowMajor)
|
||||
IMATCOPY_K_RNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
const blasint incb = (transb == 0) ? 1 : ldb;
|
||||
const blasint incb = ((transb & 1) == 0) ? 1 : ldb;
|
||||
|
||||
if (uplo == 1) {
|
||||
for (i = 0; i < n; i++) {
|
||||
j = n - i;
|
||||
for (i = 0; i < m; i++) {
|
||||
j = m - i;
|
||||
|
||||
l = j;
|
||||
#if defined(COMPLEX)
|
||||
aa = a + i * 2;
|
||||
bb = b + i * ldb * 2;
|
||||
if (transa) {
|
||||
l = k;
|
||||
if (transa & 1) {
|
||||
aa = a + lda * i * 2;
|
||||
bb = b + i * 2;
|
||||
}
|
||||
if (transb & 1)
|
||||
bb = b + i * 2;
|
||||
cc = c + i * 2 * ldc + i * 2;
|
||||
#else
|
||||
aa = a + i;
|
||||
bb = b + i * ldb;
|
||||
if (transa) {
|
||||
l = k;
|
||||
if (transa & 1) {
|
||||
aa = a + lda * i;
|
||||
bb = b + i;
|
||||
}
|
||||
if (transb & 1)
|
||||
bb = b + i;
|
||||
cc = c + i * ldc + i;
|
||||
#endif
|
||||
|
||||
|
@ -447,7 +511,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
NULL, 0);
|
||||
|
||||
if (alpha_r == ZERO && alpha_i == ZERO)
|
||||
return;
|
||||
continue;
|
||||
#else
|
||||
if (beta != ONE)
|
||||
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
|
||||
|
@ -458,8 +522,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
|
||||
IDEBUG_START;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
buffer_size = j + k + 128 / sizeof(FLOAT);
|
||||
#ifdef WINDOWS_ABI
|
||||
buffer_size += 160 / sizeof(FLOAT);
|
||||
|
@ -479,20 +541,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
#endif
|
||||
|
||||
#if defined(COMPLEX)
|
||||
if (!(transa & 1))
|
||||
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
|
||||
aa, lda, bb, incb, cc, 1,
|
||||
buffer);
|
||||
else
|
||||
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i,
|
||||
aa, lda, bb, incb, cc, 1,
|
||||
buffer);
|
||||
#else
|
||||
if (!(transa & 1))
|
||||
(gemv[(int)transa]) (j, k, 0, alpha, aa, lda,
|
||||
bb, incb, cc, 1, buffer);
|
||||
else
|
||||
(gemv[(int)transa]) (k, j, 0, alpha, aa, lda,
|
||||
bb, incb, cc, 1, buffer);
|
||||
#endif
|
||||
#ifdef SMP
|
||||
} else {
|
||||
|
||||
if (!(transa & 1))
|
||||
(gemv_thread[(int)transa]) (j, k, alpha, aa,
|
||||
lda, bb, incb, cc,
|
||||
1, buffer,
|
||||
nthreads);
|
||||
else
|
||||
(gemv_thread[(int)transa]) (k, j, alpha, aa,
|
||||
lda, bb, incb, cc,
|
||||
1, buffer,
|
||||
nthreads);
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -501,21 +577,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
}
|
||||
} else {
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
for (i = 0; i < m; i++) {
|
||||
j = i + 1;
|
||||
|
||||
l = j;
|
||||
#if defined COMPLEX
|
||||
bb = b + i * ldb * 2;
|
||||
if (transa) {
|
||||
l = k;
|
||||
if (transb & 1) {
|
||||
bb = b + i * 2;
|
||||
}
|
||||
cc = c + i * 2 * ldc;
|
||||
#else
|
||||
bb = b + i * ldb;
|
||||
if (transa) {
|
||||
l = k;
|
||||
if (transb & 1) {
|
||||
bb = b + i;
|
||||
}
|
||||
cc = c + i * ldc;
|
||||
|
@ -527,7 +601,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
NULL, 0);
|
||||
|
||||
if (alpha_r == ZERO && alpha_i == ZERO)
|
||||
return;
|
||||
continue;
|
||||
#else
|
||||
if (beta != ONE)
|
||||
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
|
||||
|
@ -537,8 +611,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
#endif
|
||||
IDEBUG_START;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
buffer_size = j + k + 128 / sizeof(FLOAT);
|
||||
#ifdef WINDOWS_ABI
|
||||
buffer_size += 160 / sizeof(FLOAT);
|
||||
|
@ -558,30 +630,39 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
|||
#endif
|
||||
|
||||
#if defined(COMPLEX)
|
||||
if (!(transa & 1))
|
||||
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
|
||||
a, lda, bb, incb, cc, 1,
|
||||
buffer);
|
||||
else
|
||||
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i,
|
||||
a, lda, bb, incb, cc, 1,
|
||||
buffer);
|
||||
#else
|
||||
if (!(transa & 1))
|
||||
(gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb,
|
||||
incb, cc, 1, buffer);
|
||||
else
|
||||
(gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb,
|
||||
incb, cc, 1, buffer);
|
||||
#endif
|
||||
|
||||
#ifdef SMP
|
||||
} else {
|
||||
|
||||
if (!(transa & 1))
|
||||
(gemv_thread[(int)transa]) (j, k, alpha, a, lda,
|
||||
bb, incb, cc, 1,
|
||||
buffer, nthreads);
|
||||
|
||||
else
|
||||
(gemv_thread[(int)transa]) (k, j, alpha, a, lda,
|
||||
bb, incb, cc, 1,
|
||||
buffer, nthreads);
|
||||
}
|
||||
#endif
|
||||
|
||||
STACK_FREE(buffer);
|
||||
}
|
||||
}
|
||||
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE,
|
||||
args.m * args.k + args.k * args.n +
|
||||
args.m * args.n, 2 * args.m * args.n * args.k);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
|
|
|
@ -149,10 +149,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
|||
|
||||
#endif
|
||||
|
||||
if ( *lda > *ldb )
|
||||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT);
|
||||
if ( *rows > *cols )
|
||||
msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT);
|
||||
else
|
||||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT);
|
||||
msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT);
|
||||
|
||||
b = malloc(msize);
|
||||
if ( b == NULL )
|
||||
|
|
|
@ -96,12 +96,6 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
|
|||
else
|
||||
{
|
||||
dp2 = *dd2 * dy1;
|
||||
if(dp2 == ZERO)
|
||||
{
|
||||
dflag = -TWO;
|
||||
dparam[0] = dflag;
|
||||
return;
|
||||
}
|
||||
dp1 = *dd1 * *dx1;
|
||||
dq2 = dp2 * dy1;
|
||||
dq1 = dp1 * *dx1;
|
||||
|
@ -113,24 +107,10 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
|
|||
dh12 = dp2 / dp1;
|
||||
|
||||
du = ONE - dh12 * dh21;
|
||||
if(du > ZERO)
|
||||
{
|
||||
dflag = ZERO;
|
||||
*dd1 = *dd1 / du;
|
||||
*dd2 = *dd2 / du;
|
||||
*dx1 = *dx1 * du;
|
||||
} else {
|
||||
dflag = -ONE;
|
||||
|
||||
dh11 = ZERO;
|
||||
dh12 = ZERO;
|
||||
dh21 = ZERO;
|
||||
dh22 = ZERO;
|
||||
|
||||
*dd1 = ZERO;
|
||||
*dd2 = ZERO;
|
||||
*dx1 = ZERO;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
|
|
|
@ -171,10 +171,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
|||
}
|
||||
#endif
|
||||
|
||||
if ( *lda > *ldb )
|
||||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
if ( *rows > *cols )
|
||||
msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
else
|
||||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
|
||||
b = malloc(msize);
|
||||
if ( b == NULL )
|
||||
|
|
|
@ -40,7 +40,6 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a,
|
|||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
if ( alpha_r == 1.0 && alpha_i == 0.0 ) return (0);
|
||||
|
||||
aptr = a;
|
||||
lda *= 2;
|
||||
|
|
|
@ -33,7 +33,7 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *
|
|||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( n <= 0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
|
|
@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
|
|||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( n <= 0 ) return(0);
|
||||
if ( da == 0.0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
|
|
|
@ -41,7 +41,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
BLASLONG i=0;
|
||||
BLASLONG ix=0,iy=0;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( n <= 0 ) return(0);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
|
|
@ -46,7 +46,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
BLASLONG ix=0,iy=0;
|
||||
double dot = 0.0 ;
|
||||
|
||||
if ( n < 0 ) return(dot);
|
||||
if ( n < 1 ) return(dot);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
|
|
@ -41,7 +41,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
|
|||
BLASLONG ix=0,iy=0;
|
||||
FLOAT temp;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( n <= 0 ) return(0);
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
|
|
|
@ -44,7 +44,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
|||
BLASLONG inc_x2;
|
||||
BLASLONG inc_y2;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( n <= 0 ) return(0);
|
||||
if ( da_r == 0.0 && da_i == 0.0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
|
|
|
@ -43,7 +43,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|||
BLASLONG inc_x2;
|
||||
BLASLONG inc_y2;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( n <= 0 ) return(0);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
inc_y2 = 2 * inc_y;
|
||||
|
|
|
@ -45,7 +45,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
|
|||
BLASLONG inc_x2;
|
||||
BLASLONG inc_y2;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
if ( n <= 0 ) return(0);
|
||||
|
||||
inc_x2 = 2 * inc_x;
|
||||
inc_y2 = 2 * inc_y;
|
||||
|
|
Loading…
Reference in New Issue