Bug fix and improvements for [z]imatcopy interface.
This commit is contained in:
parent
c74ee11376
commit
33ab415f68
|
@ -120,17 +120,20 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
||||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef NEW_IMATCOPY
|
#ifdef NEW_IMATCOPY
|
||||||
if ( *lda == *ldb && *rows == *cols) {
|
if ( *lda == *ldb ) {
|
||||||
if ( order == BlasColMajor )
|
if ( order == BlasColMajor )
|
||||||
{
|
{
|
||||||
if ( trans == BlasNoTrans )
|
if ( trans == BlasNoTrans )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda );
|
IMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda );
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
else
|
else if ( *rows == *cols )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda );
|
IMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda );
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -138,21 +141,18 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
||||||
if ( trans == BlasNoTrans )
|
if ( trans == BlasNoTrans )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda );
|
IMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda );
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
IMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
else if ( *rows == *cols )
|
||||||
|
{
|
||||||
|
IMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ( *lda > *ldb )
|
msize = (size_t)(*rows) * (*cols) * sizeof(FLOAT);
|
||||||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT);
|
|
||||||
else
|
|
||||||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT);
|
|
||||||
|
|
||||||
b = malloc(msize);
|
b = malloc(msize);
|
||||||
if ( b == NULL )
|
if ( b == NULL )
|
||||||
|
@ -165,26 +165,26 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
||||||
{
|
{
|
||||||
if ( trans == BlasNoTrans )
|
if ( trans == BlasNoTrans )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *rows );
|
||||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *ldb, a, *ldb );
|
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *rows, a, *ldb );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *cols );
|
||||||
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, b, *ldb, a, *ldb );
|
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, b, *cols, a, *ldb );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ( trans == BlasNoTrans )
|
if ( trans == BlasNoTrans )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *cols );
|
||||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb );
|
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *cols, a, *ldb );
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *rows );
|
||||||
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, b, *ldb, a, *ldb );
|
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, b, *rows, a, *ldb );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -125,27 +125,33 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
||||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef NEW_IMATCOPY
|
#ifdef NEW_IMATCOPY
|
||||||
if (*lda == *ldb && *cols == *rows) {
|
if (*lda == *ldb ) {
|
||||||
if ( order == BlasColMajor )
|
if ( order == BlasColMajor )
|
||||||
{
|
{
|
||||||
|
|
||||||
if ( trans == BlasNoTrans )
|
if ( trans == BlasNoTrans )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
IMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if ( trans == BlasConj )
|
if ( trans == BlasConj )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
IMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if ( trans == BlasTrans )
|
if ( trans == BlasTrans && *rows == *cols )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
IMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if ( trans == BlasTransConj )
|
if ( trans == BlasTransConj && *rows == *cols )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
IMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -153,67 +159,59 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
||||||
if ( trans == BlasNoTrans )
|
if ( trans == BlasNoTrans )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
IMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if ( trans == BlasConj )
|
if ( trans == BlasConj )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
IMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if ( trans == BlasTrans )
|
if ( trans == BlasTrans && *rows == *cols )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
IMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if ( trans == BlasTransConj )
|
if ( trans == BlasTransConj && *rows == *cols )
|
||||||
{
|
{
|
||||||
IMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
IMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda );
|
||||||
}
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if ( *lda > *ldb )
|
msize = (size_t)(*rows) * (*cols) * sizeof(FLOAT) * 2;
|
||||||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2;
|
|
||||||
else
|
|
||||||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2;
|
|
||||||
|
|
||||||
b = malloc(msize);
|
b = malloc(msize);
|
||||||
if ( b == NULL )
|
if ( b == NULL )
|
||||||
{
|
{
|
||||||
printf("Memory alloc failed in zimatcopy\n");
|
printf("Memory alloc failed\n");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if ( order == BlasColMajor )
|
if ( order == BlasColMajor )
|
||||||
{
|
{
|
||||||
|
|
||||||
if ( trans == BlasNoTrans )
|
if ( trans == BlasNoTrans )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
|
||||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
|
||||||
free(b);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if ( trans == BlasConj )
|
if ( trans == BlasConj )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
|
||||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
|
||||||
free(b);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if ( trans == BlasTrans )
|
if ( trans == BlasTrans )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
|
||||||
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
|
||||||
free(b);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if ( trans == BlasTransConj )
|
if ( trans == BlasTransConj )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
|
||||||
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
OMATCOPY_K_CN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
|
||||||
free(b);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -222,34 +220,27 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
||||||
|
|
||||||
if ( trans == BlasNoTrans )
|
if ( trans == BlasNoTrans )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
|
||||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
|
||||||
free(b);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if ( trans == BlasConj )
|
if ( trans == BlasConj )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *cols );
|
||||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *cols, a, *ldb );
|
||||||
free(b);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if ( trans == BlasTrans )
|
if ( trans == BlasTrans )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
|
||||||
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
|
||||||
free(b);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
if ( trans == BlasTransConj )
|
if ( trans == BlasTransConj )
|
||||||
{
|
{
|
||||||
OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *rows );
|
||||||
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
OMATCOPY_K_RN(*cols, *rows, (FLOAT) 1.0, (FLOAT) 0.0 , b, *rows, a, *ldb );
|
||||||
free(b);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
free(b);
|
free(b);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue