Ref #51: added blas extensions simatcopy, dimatcopy, cimatcopy, zimatcopy
This commit is contained in:
parent
cee257f384
commit
faeab93df0
|
@ -769,6 +769,11 @@ void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, do
|
|||
void BLASFUNC(comatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zomatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
|
||||
void BLASFUNC(simatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ SBLAS2OBJS = \
|
|||
SBLAS3OBJS = \
|
||||
sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \
|
||||
strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \
|
||||
somatcopy.$(SUFFIX)
|
||||
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)
|
||||
|
||||
|
||||
DBLAS1OBJS = \
|
||||
|
@ -66,7 +66,7 @@ DBLAS2OBJS = \
|
|||
DBLAS3OBJS = \
|
||||
dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \
|
||||
dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \
|
||||
domatcopy.$(SUFFIX)
|
||||
domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)
|
||||
|
||||
CBLAS1OBJS = \
|
||||
caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \
|
||||
|
@ -94,7 +94,7 @@ CBLAS3OBJS = \
|
|||
cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \
|
||||
ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \
|
||||
chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \
|
||||
comatcopy.$(SUFFIX)
|
||||
comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)
|
||||
|
||||
ZBLAS1OBJS = \
|
||||
zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \
|
||||
|
@ -122,7 +122,7 @@ ZBLAS3OBJS = \
|
|||
zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \
|
||||
ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \
|
||||
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
|
||||
zomatcopy.$(SUFFIX)
|
||||
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)
|
||||
|
||||
ifdef SUPPORT_GEMM3M
|
||||
|
||||
|
@ -2032,13 +2032,25 @@ cblas_caxpby.$(SUFFIX) cblas_caxpby.$(PSUFFIX) : zaxpby.c
|
|||
domatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
somatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c
|
||||
somatcopy.$(SUFFIX) somatcopy.$(PSUFFIX) : omatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
comatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : zomatcopy.c
|
||||
comatcopy.$(SUFFIX) comatcopy.$(PSUFFIX) : zomatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zomatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : zomatcopy.c
|
||||
zomatcopy.$(SUFFIX) zomatcopy.$(PSUFFIX) : zomatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dimatcopy.$(SUFFIX) dimatcopy.$(PSUFFIX) : imatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
simatcopy.$(SUFFIX) simatcopy.$(PSUFFIX) : imatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cimatcopy.$(SUFFIX) cimatcopy.$(PSUFFIX) : zimatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***********************************************************
|
||||
* 2014/06/10 Saar
|
||||
***********************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define ERROR_NAME "DIMATCOPY"
|
||||
#else
|
||||
#define ERROR_NAME "SIMATCOPY"
|
||||
#endif
|
||||
|
||||
#define BlasRowMajor 0
|
||||
#define BlasColMajor 1
|
||||
#define BlasNoTrans 0
|
||||
#define BlasTrans 1
|
||||
|
||||
#undef malloc
|
||||
#undef free
|
||||
|
||||
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb)
|
||||
{
|
||||
|
||||
char Order, Trans;
|
||||
int order=-1,trans=-1;
|
||||
blasint info = -1;
|
||||
FLOAT *b;
|
||||
size_t msize;
|
||||
|
||||
Order = *ORDER;
|
||||
Trans = *TRANS;
|
||||
|
||||
TOUPPER(Order);
|
||||
TOUPPER(Trans);
|
||||
|
||||
if ( Order == 'C' ) order = BlasColMajor;
|
||||
if ( Order == 'R' ) order = BlasRowMajor;
|
||||
if ( Trans == 'N' ) trans = BlasNoTrans;
|
||||
if ( Trans == 'R' ) trans = BlasNoTrans;
|
||||
if ( Trans == 'T' ) trans = BlasTrans;
|
||||
if ( Trans == 'C' ) trans = BlasTrans;
|
||||
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( *lda > *ldb )
|
||||
msize = (*lda) * (*ldb) * sizeof(FLOAT);
|
||||
else
|
||||
msize = (*ldb) * (*ldb) * sizeof(FLOAT);
|
||||
|
||||
b = malloc(msize);
|
||||
if ( b == NULL )
|
||||
{
|
||||
printf("Memory alloc failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor )
|
||||
{
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *ldb, a, *ldb );
|
||||
}
|
||||
else
|
||||
{
|
||||
OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb );
|
||||
}
|
||||
else
|
||||
{
|
||||
OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb );
|
||||
}
|
||||
}
|
||||
|
||||
free(b);
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***********************************************************
|
||||
* 2014/06/10 Saar
|
||||
***********************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define ERROR_NAME "ZIMATCOPY"
|
||||
#else
|
||||
#define ERROR_NAME "CIMATCOPY"
|
||||
#endif
|
||||
|
||||
#define BlasRowMajor 0
|
||||
#define BlasColMajor 1
|
||||
#define BlasNoTrans 0
|
||||
#define BlasTrans 1
|
||||
#define BlasTransConj 2
|
||||
#define BlasConj 3
|
||||
|
||||
void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb)
|
||||
{
|
||||
|
||||
char Order, Trans;
|
||||
int order=-1,trans=-1;
|
||||
blasint info = -1;
|
||||
FLOAT *b;
|
||||
size_t msize;
|
||||
|
||||
Order = *ORDER;
|
||||
Trans = *TRANS;
|
||||
|
||||
TOUPPER(Order);
|
||||
TOUPPER(Trans);
|
||||
|
||||
if ( Order == 'C' ) order = BlasColMajor;
|
||||
if ( Order == 'R' ) order = BlasRowMajor;
|
||||
if ( Trans == 'N' ) trans = BlasNoTrans;
|
||||
if ( Trans == 'T' ) trans = BlasTrans;
|
||||
if ( Trans == 'C' ) trans = BlasTransConj;
|
||||
if ( Trans == 'R' ) trans = BlasConj;
|
||||
|
||||
if ( order == BlasColMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *cols ) info = 9;
|
||||
}
|
||||
if ( order == BlasRowMajor)
|
||||
{
|
||||
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasConj && *ldb < *cols ) info = 9;
|
||||
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
|
||||
if ( trans == BlasTransConj && *ldb < *rows ) info = 9;
|
||||
}
|
||||
|
||||
if ( order == BlasColMajor && *lda < *rows ) info = 7;
|
||||
if ( order == BlasRowMajor && *lda < *cols ) info = 7;
|
||||
if ( *cols <= 0 ) info = 4;
|
||||
if ( *rows <= 0 ) info = 3;
|
||||
if ( trans < 0 ) info = 2;
|
||||
if ( order < 0 ) info = 1;
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
if ( *lda > *ldb )
|
||||
msize = (*lda) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
else
|
||||
msize = (*ldb) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
|
||||
b = malloc(msize);
|
||||
if ( b == NULL )
|
||||
{
|
||||
printf("Memory alloc failed\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
if ( order == BlasColMajor )
|
||||
{
|
||||
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasConj )
|
||||
{
|
||||
OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTrans )
|
||||
{
|
||||
OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTransConj )
|
||||
{
|
||||
OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if ( trans == BlasNoTrans )
|
||||
{
|
||||
OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasConj )
|
||||
{
|
||||
OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTrans )
|
||||
{
|
||||
OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
if ( trans == BlasTransConj )
|
||||
{
|
||||
OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb );
|
||||
OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb );
|
||||
free(b);
|
||||
return;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue