Add ATLAS-style ?geadd function
This commit is contained in:
parent
771b18ae9c
commit
39cc6b21d3
10
cblas.h
10
cblas.h
|
@ -347,6 +347,16 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum
|
|||
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
|
|
@ -333,6 +333,16 @@ void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, bl
|
|||
blasint clda, blasint cldb);
|
||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
|
||||
blasint clda, blasint cldb);
|
||||
|
||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
|
||||
double *c, blasint cldc);
|
||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
|
||||
double *c, blasint cldc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
|
|
@ -220,6 +220,7 @@
|
|||
#define COMATCOPY_K_CTC comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC comatcopy_k_rtc
|
||||
|
||||
#define CGEADD_K cgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
|
@ -402,6 +403,7 @@
|
|||
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
|
||||
#define CGEADD_K gotoblas -> cgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -149,6 +149,7 @@
|
|||
#define DOMATCOPY_K_RN domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT domatcopy_k_rt
|
||||
#define DGEADD_K dgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
|
@ -267,6 +268,8 @@
|
|||
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
|
||||
|
||||
#define DGEADD_K gotoblas -> dgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define DGEMM_NN dgemm_nn
|
||||
|
|
|
@ -754,6 +754,12 @@ void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, do
|
|||
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
|
||||
void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
|
|
@ -1762,6 +1762,11 @@ int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, dou
|
|||
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
|
||||
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
|
||||
int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG);
|
||||
int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
}
|
||||
|
|
|
@ -634,7 +634,7 @@
|
|||
#define OMATCOPY_K_RN DOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT DOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT DOMATCOPY_K_RT
|
||||
|
||||
#define GEADD_K DGEADD_K
|
||||
#else
|
||||
|
||||
#define AMAX_K SAMAX_K
|
||||
|
@ -932,6 +932,7 @@
|
|||
#define OMATCOPY_K_CT SOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT SOMATCOPY_K_RT
|
||||
|
||||
#define GEADD_K SGEADD_K
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
|
@ -1746,6 +1747,7 @@
|
|||
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
|
||||
#define GEADD_K ZGEADD_K
|
||||
|
||||
#else
|
||||
|
||||
|
@ -2159,6 +2161,8 @@
|
|||
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
|
||||
|
||||
#define GEADD_K CGEADD_K
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -855,6 +855,10 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
} gotoblas_t;
|
||||
|
||||
|
|
|
@ -153,6 +153,7 @@
|
|||
#define SOMATCOPY_K_CT somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT somatcopy_k_rt
|
||||
|
||||
#define SGEADD_K sgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
|
@ -274,6 +275,7 @@
|
|||
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
|
||||
|
||||
#define SGEADD_K gotoblas -> sgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -220,6 +220,7 @@
|
|||
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K zgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
|
@ -403,6 +404,8 @@
|
|||
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K zgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define ZGEMM_NN zgemm_nn
|
||||
|
|
|
@ -23,7 +23,8 @@
|
|||
zhpr,zrotg,zscal,zswap,zsymm,zsyr2k,zsyrk,ztbmv,
|
||||
ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv, zsymv,
|
||||
xerbla,
|
||||
saxpby,daxpby,caxpby,zaxpby
|
||||
saxpby,daxpby,caxpby,zaxpby,
|
||||
sgeadd,dgeadd,cgeadd,zgeadd,
|
||||
);
|
||||
|
||||
@cblasobjs = (
|
||||
|
@ -55,6 +56,7 @@
|
|||
cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby,
|
||||
cblas_somatcopy, cblas_domatcopy, cblas_comatcopy, cblas_zomatcopy,
|
||||
cblas_simatcopy, cblas_dimatcopy, cblas_cimatcopy, cblas_zimatcopy,
|
||||
cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd
|
||||
);
|
||||
|
||||
@exblasobjs = (
|
||||
|
|
|
@ -43,7 +43,8 @@ SBLAS2OBJS = \
|
|||
SBLAS3OBJS = \
|
||||
sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \
|
||||
strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \
|
||||
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)
|
||||
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
|
||||
sgeadd.$(SUFFIX)
|
||||
|
||||
|
||||
DBLAS1OBJS = \
|
||||
|
@ -68,7 +69,8 @@ DBLAS2OBJS = \
|
|||
DBLAS3OBJS = \
|
||||
dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \
|
||||
dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \
|
||||
domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)
|
||||
domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX)\
|
||||
dgeadd.$(SUFFIX)
|
||||
|
||||
CBLAS1OBJS = \
|
||||
caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \
|
||||
|
@ -96,7 +98,8 @@ CBLAS3OBJS = \
|
|||
cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \
|
||||
ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \
|
||||
chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \
|
||||
comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)
|
||||
comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX)\
|
||||
cgeadd.$(SUFFIX)
|
||||
|
||||
ZBLAS1OBJS = \
|
||||
zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \
|
||||
|
@ -124,7 +127,8 @@ ZBLAS3OBJS = \
|
|||
zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \
|
||||
ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \
|
||||
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
|
||||
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)
|
||||
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)\
|
||||
zgeadd.$(SUFFIX)
|
||||
|
||||
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||
|
||||
|
@ -269,7 +273,8 @@ CSBLAS2OBJS = \
|
|||
|
||||
CSBLAS3OBJS = \
|
||||
cblas_sgemm.$(SUFFIX) cblas_ssymm.$(SUFFIX) cblas_strmm.$(SUFFIX) cblas_strsm.$(SUFFIX) \
|
||||
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)
|
||||
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
|
||||
cblas_sgeadd.$(SUFFIX)
|
||||
|
||||
CDBLAS1OBJS = \
|
||||
cblas_idamax.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \
|
||||
|
@ -285,7 +290,8 @@ CDBLAS2OBJS = \
|
|||
|
||||
CDBLAS3OBJS += \
|
||||
cblas_dgemm.$(SUFFIX) cblas_dsymm.$(SUFFIX) cblas_dtrmm.$(SUFFIX) cblas_dtrsm.$(SUFFIX) \
|
||||
cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX)
|
||||
cblas_dsyrk.$(SUFFIX) cblas_dsyr2k.$(SUFFIX) cblas_domatcopy.$(SUFFIX) cblas_dimatcopy.$(SUFFIX) \
|
||||
cblas_dgeadd.$(SUFFIX)
|
||||
|
||||
CCBLAS1OBJS = \
|
||||
cblas_icamax.$(SUFFIX) cblas_scasum.$(SUFFIX) cblas_caxpy.$(SUFFIX) \
|
||||
|
@ -308,7 +314,9 @@ CCBLAS3OBJS = \
|
|||
cblas_cgemm.$(SUFFIX) cblas_csymm.$(SUFFIX) cblas_ctrmm.$(SUFFIX) cblas_ctrsm.$(SUFFIX) \
|
||||
cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \
|
||||
cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \
|
||||
cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)
|
||||
cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\
|
||||
cblas_cgeadd.$(SUFFIX)
|
||||
|
||||
|
||||
|
||||
CZBLAS1OBJS = \
|
||||
|
@ -332,7 +340,9 @@ CZBLAS3OBJS = \
|
|||
cblas_zgemm.$(SUFFIX) cblas_zsymm.$(SUFFIX) cblas_ztrmm.$(SUFFIX) cblas_ztrsm.$(SUFFIX) \
|
||||
cblas_zsyrk.$(SUFFIX) cblas_zsyr2k.$(SUFFIX) \
|
||||
cblas_zhemm.$(SUFFIX) cblas_zherk.$(SUFFIX) cblas_zher2k.$(SUFFIX)\
|
||||
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX)
|
||||
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \
|
||||
cblas_zgeadd.$(SUFFIX)
|
||||
|
||||
|
||||
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||
|
||||
|
@ -2103,4 +2113,27 @@ zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c
|
|||
cblas_zimatcopy.$(SUFFIX) cblas_zimatcopy.$(PSUFFIX) : zimatcopy.c
|
||||
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
|
||||
|
||||
sgeadd.$(SUFFIX) sgeadd.$(PSUFFIX) : geadd.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgeadd.$(SUFFIX) dgeadd.$(PSUFFIX) : geadd.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgeadd.$(SUFFIX) cgeadd.$(PSUFFIX) : zgeadd.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgeadd.$(SUFFIX) zgeadd.$(PSUFFIX) : zgeadd.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cblas_sgeadd.$(SUFFIX) cblas_sgeadd.$(PSUFFIX) : geadd.c
|
||||
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
|
||||
|
||||
cblas_dgeadd.$(SUFFIX) cblas_dgeadd.$(PSUFFIX) : geadd.c
|
||||
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
|
||||
|
||||
cblas_cgeadd.$(SUFFIX) cblas_cgeadd.$(PSUFFIX) : zgeadd.c
|
||||
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
|
||||
|
||||
cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c
|
||||
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)
|
||||
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define ERROR_NAME "DGEADD "
|
||||
#else
|
||||
#define ERROR_NAME "SGEADD "
|
||||
#endif
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA,
|
||||
FLOAT *BETA, FLOAT *c, blasint *LDC)
|
||||
{
|
||||
|
||||
blasint m = *M;
|
||||
blasint n = *N;
|
||||
blasint lda = *LDA;
|
||||
blasint ldc = *LDC;
|
||||
FLOAT alpha = *ALPHA;
|
||||
FLOAT beta = *BETA;
|
||||
|
||||
blasint info;
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
info = 0;
|
||||
|
||||
|
||||
if (lda < MAX(1, m)) info = 6;
|
||||
if (ldc < MAX(1, m)) info = 8;
|
||||
|
||||
if (n < 0) info = 2;
|
||||
if (m < 0) info = 1;
|
||||
|
||||
if (info != 0){
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
#else
|
||||
void CNAME( enum CBLAS_ORDER order, blasint m, blasint n, FLOAT alpha, FLOAT *a, blasint lda, FLOAT beta,
|
||||
FLOAT *c, blasint ldc)
|
||||
{
|
||||
/*
|
||||
void CNAME(enum CBLAS_ORDER order,
|
||||
blasint m, blasint n,
|
||||
FLOAT alpha,
|
||||
FLOAT *a, blasint lda,
|
||||
FLOAT beta,
|
||||
FLOAT *c, blasint ldc){ */
|
||||
|
||||
blasint info, t;
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
info = 0;
|
||||
|
||||
if (order == CblasColMajor) {
|
||||
|
||||
info = -1;
|
||||
|
||||
if (ldc < MAX(1, m)) info = 8;
|
||||
if (lda < MAX(1, m)) info = 5;
|
||||
if (n < 0) info = 2;
|
||||
if (m < 0) info = 1;
|
||||
|
||||
}
|
||||
|
||||
if (order == CblasRowMajor) {
|
||||
info = -1;
|
||||
|
||||
t = n;
|
||||
n = m;
|
||||
m = t;
|
||||
|
||||
if (ldc < MAX(1, m)) info = 8;
|
||||
if (lda < MAX(1, m)) info = 5;
|
||||
if (n < 0) info = 2;
|
||||
if (m < 0) info = 1;
|
||||
}
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if ((m==0) || (n==0)) return;
|
||||
|
||||
|
||||
IDEBUG_START;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
|
||||
GEADD_K(m,n,alpha, a, lda, beta, c, ldc);
|
||||
|
||||
|
||||
FUNCTION_PROFILE_END(1, 2* m * n , 2 * m * n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
return;
|
||||
|
||||
}
|
|
@ -0,0 +1,146 @@
|
|||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define ERROR_NAME "ZGEADD "
|
||||
#else
|
||||
#define ERROR_NAME "CGEADD "
|
||||
#endif
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(blasint *M, blasint *N, FLOAT *ALPHA, FLOAT *a, blasint *LDA,
|
||||
FLOAT *BETA, FLOAT *c, blasint *LDC)
|
||||
{
|
||||
|
||||
blasint m = *M;
|
||||
blasint n = *N;
|
||||
blasint lda = *LDA;
|
||||
blasint ldc = *LDC;
|
||||
|
||||
blasint info;
|
||||
|
||||
PRINT_DEBUG_NAME;
|
||||
|
||||
info = 0;
|
||||
|
||||
|
||||
if (lda < MAX(1, m)) info = 6;
|
||||
if (ldc < MAX(1, m)) info = 8;
|
||||
|
||||
if (n < 0) info = 2;
|
||||
if (m < 0) info = 1;
|
||||
|
||||
if (info != 0){
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
#else
|
||||
void CNAME( enum CBLAS_ORDER order, blasint m, blasint n, FLOAT *ALPHA, FLOAT *a, blasint lda, FLOAT *BETA,
|
||||
FLOAT *c, blasint ldc)
|
||||
{
|
||||
/*
|
||||
void CNAME(enum CBLAS_ORDER order,
|
||||
blasint m, blasint n,
|
||||
FLOAT alpha,
|
||||
FLOAT *a, blasint lda,
|
||||
FLOAT beta,
|
||||
FLOAT *c, blasint ldc){ */
|
||||
|
||||
blasint info, t;
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
|
||||
info = 0;
|
||||
|
||||
if (order == CblasColMajor) {
|
||||
|
||||
info = -1;
|
||||
|
||||
if (ldc < MAX(1, m)) info = 8;
|
||||
if (lda < MAX(1, m)) info = 5;
|
||||
if (n < 0) info = 2;
|
||||
if (m < 0) info = 1;
|
||||
|
||||
}
|
||||
|
||||
if (order == CblasRowMajor) {
|
||||
info = -1;
|
||||
|
||||
t = n;
|
||||
n = m;
|
||||
m = t;
|
||||
|
||||
if (ldc < MAX(1, m)) info = 8;
|
||||
if (lda < MAX(1, m)) info = 5;
|
||||
if (n < 0) info = 2;
|
||||
if (m < 0) info = 1;
|
||||
}
|
||||
|
||||
if (info >= 0) {
|
||||
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||
return;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if ((m==0) || (n==0)) return;
|
||||
|
||||
|
||||
IDEBUG_START;
|
||||
|
||||
FUNCTION_PROFILE_START();
|
||||
|
||||
|
||||
GEADD_K(m,n,ALPHA[0],ALPHA[1], a, lda, BETA[0], BETA[1], c, ldc);
|
||||
|
||||
|
||||
FUNCTION_PROFILE_END(1, 2* m * n , 2 * m * n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
||||
return;
|
||||
|
||||
}
|
|
@ -329,23 +329,27 @@ endif
|
|||
###### BLAS extensions #####
|
||||
SBLASOBJS += \
|
||||
somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
|
||||
somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX)
|
||||
somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
|
||||
sgeadd_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DBLASOBJS += \
|
||||
domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
|
||||
domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX)
|
||||
domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
|
||||
dgeadd_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CBLASOBJS += \
|
||||
comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
|
||||
comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
|
||||
comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
|
||||
comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX)
|
||||
comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
|
||||
cgeadd_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZBLASOBJS += \
|
||||
zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
|
||||
zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
|
||||
zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
|
||||
zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX)
|
||||
zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
|
||||
zgeadd_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
@ -3440,3 +3444,31 @@ $(KDIR)zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RTC)
|
|||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@
|
||||
|
||||
|
||||
ifndef SGEADD_K
|
||||
SGEADD_K = ../arm/geadd.c
|
||||
endif
|
||||
|
||||
$(KDIR)sgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@
|
||||
|
||||
ifndef DGEADD_K
|
||||
DGEADD_K = ../arm/geadd.c
|
||||
endif
|
||||
|
||||
$(KDIR)dgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEADD_K)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@
|
||||
|
||||
ifndef CGEADD_K
|
||||
CGEADD_K = ../arm/zgeadd.c
|
||||
endif
|
||||
|
||||
$(KDIR)cgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(CGEADD_K)
|
||||
$(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM $< -o $@
|
||||
|
||||
ifndef ZGEADD_K
|
||||
ZGEADD_K = ../arm/zgeadd.c
|
||||
endif
|
||||
|
||||
$(KDIR)zgeadd_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEADD_K)
|
||||
$(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM $< -o $@
|
||||
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT beta, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
SCAL_K(rows, 0,0, beta, bptr, 1, NULL, 0,NULL,0);
|
||||
bptr+=ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
for (i = 0; i < cols; i++) {
|
||||
AXPBY_K(rows, alpha, aptr, 1, beta, bptr, 1);
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,65 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alphar, FLOAT alphai, FLOAT *a, BLASLONG lda, FLOAT betar, FLOAT betai , FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i;
|
||||
FLOAT *aptr,*bptr;
|
||||
|
||||
if ( rows <= 0 ) return(0);
|
||||
if ( cols <= 0 ) return(0);
|
||||
|
||||
|
||||
aptr = a;
|
||||
bptr = b;
|
||||
lda *= 2;
|
||||
ldb *= 2;
|
||||
|
||||
if ( alphar == 0.0 && alphai == 0.0 )
|
||||
{
|
||||
for ( i=0; i<cols ; i++ )
|
||||
{
|
||||
SCAL_K(rows, 0,0, betar, betai, bptr, 1, NULL, 0,NULL,0);
|
||||
bptr+=ldb;
|
||||
}
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
for (i = 0; i < cols; i++) {
|
||||
AXPBY_K(rows, alphar, alphai, aptr, 1, betar, betai, bptr, 1);
|
||||
aptr += lda;
|
||||
bptr += ldb;
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue