diff --git a/cblas.h b/cblas.h index 971c132dd..20445858c 100644 --- a/cblas.h +++ b/cblas.h @@ -305,6 +305,16 @@ void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBL void cblas_xerbla(blasint p, char *rout, char *form, ...); +/*** BLAS extensions ***/ + +void cblas_saxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy); + +void cblas_daxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double beta, double *y, OPENBLAS_CONST blasint incy); + +void cblas_caxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *alpha, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST float *beta, float *y, OPENBLAS_CONST blasint incy); + +void cblas_zaxpby(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *alpha, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx,OPENBLAS_CONST double *beta, double *y, OPENBLAS_CONST blasint incy); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/cblas_noconst.h b/cblas_noconst.h index fd2e940c0..002c46b76 100644 --- a/cblas_noconst.h +++ b/cblas_noconst.h @@ -296,6 +296,17 @@ void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANS void cblas_xerbla(blasint p, char *rout, char *form, ...); +/*** BLAS extensions ***/ + +void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy); + +void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy); + +void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy); + +void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy); + + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/common_c.h b/common_c.h index f78f17213..724d1e261 100644 --- a/common_c.h +++ b/common_c.h @@ -209,6 +209,18 @@ #define CNEG_TCOPY cneg_tcopy #define CLASWP_NCOPY claswp_ncopy +#define CAXPBY_K caxpby_k + +#define COMATCOPY_K_CN comatcopy_k_cn +#define COMATCOPY_K_RN comatcopy_k_rn +#define COMATCOPY_K_CT comatcopy_k_ct +#define COMATCOPY_K_RT comatcopy_k_rt +#define COMATCOPY_K_CNC comatcopy_k_cnc +#define COMATCOPY_K_RNC comatcopy_k_rnc +#define COMATCOPY_K_CTC comatcopy_k_ctc +#define COMATCOPY_K_RTC comatcopy_k_rtc + + #else #define CAMAX_K gotoblas -> camax_k @@ -380,6 +392,17 @@ #define CNEG_TCOPY gotoblas -> cneg_tcopy #define CLASWP_NCOPY gotoblas -> claswp_ncopy +#define CAXPBY_K gotoblas -> caxpby_k + +#define COMATCOPY_K_CN gotoblas -> comatcopy_k_cn +#define COMATCOPY_K_RN gotoblas -> comatcopy_k_rn +#define COMATCOPY_K_CT gotoblas -> comatcopy_k_ct +#define COMATCOPY_K_RT gotoblas -> comatcopy_k_rt +#define COMATCOPY_K_CNC gotoblas -> comatcopy_k_cnc +#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc +#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc +#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc + #endif #define CGEMM_NN cgemm_nn diff --git a/common_d.h b/common_d.h index 4c9a53f6c..c34e1f28f 100644 --- a/common_d.h +++ b/common_d.h @@ -144,6 +144,12 @@ #define DNEG_TCOPY dneg_tcopy #define DLASWP_NCOPY dlaswp_ncopy +#define DAXPBY_K daxpby_k +#define DOMATCOPY_K_CN domatcopy_k_cn +#define DOMATCOPY_K_RN domatcopy_k_rn +#define DOMATCOPY_K_CT domatcopy_k_ct +#define DOMATCOPY_K_RT domatcopy_k_rt + #else #define DAMAX_K gotoblas -> damax_k @@ -255,6 +261,12 @@ #define DNEG_TCOPY gotoblas -> dneg_tcopy #define DLASWP_NCOPY gotoblas -> dlaswp_ncopy +#define DAXPBY_K gotoblas -> daxpby_k +#define DOMATCOPY_K_CN gotoblas -> domatcopy_k_cn +#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn +#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct +#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt + #endif #define DGEMM_NN dgemm_nn diff --git a/common_interface.h b/common_interface.h index 14c2cf7a4..2cc1619ff 100644 --- a/common_interface.h +++ b/common_interface.h @@ -757,6 +757,23 @@ FLOATRET BLASFUNC(slamc3)(float *, float *); double BLASFUNC(dlamc3)(double *, double *); xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *); +/* BLAS extensions */ + +void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *); +void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *); +void BLASFUNC(caxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *); +void BLASFUNC(zaxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *); + +void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *); +void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *); +void BLASFUNC(comatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *); +void BLASFUNC(zomatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *); + +void BLASFUNC(simatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *); +void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *); +void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *); +void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *); + #ifdef __cplusplus } diff --git a/common_level1.h b/common_level1.h index f51ced668..a45eec134 100644 --- a/common_level1.h +++ b/common_level1.h @@ -204,6 +204,13 @@ int srotm_k (BLASLONG, float, BLASLONG, float, BLASLONG, float); int drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double); int qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble); + +int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); +int daxpby_k (BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); +int caxpby_k (BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); +int zaxpby_k (BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG); + + #ifdef __CUDACC__ } #endif diff --git a/common_level3.h b/common_level3.h index cbc67a6c3..7e1756e67 100644 --- a/common_level3.h +++ b/common_level3.h @@ -1732,6 +1732,37 @@ int zgemc_otcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b, BLA int xgemc_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c); int xgemc_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c); +int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); +int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); +int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); +int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); + +int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); +int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); +int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); +int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); + +int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); + +int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); + +int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); + +int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); + + #ifdef __CUDACC__ } #endif diff --git a/common_macro.h b/common_macro.h index 0c34ecb01..f9de3773a 100644 --- a/common_macro.h +++ b/common_macro.h @@ -628,6 +628,13 @@ #define HERK_THREAD_LR DSYRK_THREAD_LN #define HERK_THREAD_LC DSYRK_THREAD_LT +#define AXPBY_K DAXPBY_K + +#define OMATCOPY_K_CN DOMATCOPY_K_CN +#define OMATCOPY_K_RN DOMATCOPY_K_RN +#define OMATCOPY_K_CT DOMATCOPY_K_CT +#define OMATCOPY_K_RT DOMATCOPY_K_RT + #else #define AMAX_K SAMAX_K @@ -918,6 +925,13 @@ #define HERK_THREAD_LR SSYRK_THREAD_LN #define HERK_THREAD_LC SSYRK_THREAD_LT +#define AXPBY_K SAXPBY_K + +#define OMATCOPY_K_CN SOMATCOPY_K_CN +#define OMATCOPY_K_RN SOMATCOPY_K_RN +#define OMATCOPY_K_CT SOMATCOPY_K_CT +#define OMATCOPY_K_RT SOMATCOPY_K_RT + #endif #else #ifdef XDOUBLE @@ -1722,6 +1736,17 @@ #define SYMM_OUTCOPY ZSYMM_OUTCOPY #define SYMM_OLTCOPY ZSYMM_OLTCOPY +#define AXPBY_K ZAXPBY_K + +#define OMATCOPY_K_CN ZOMATCOPY_K_CN +#define OMATCOPY_K_RN ZOMATCOPY_K_RN +#define OMATCOPY_K_CT ZOMATCOPY_K_CT +#define OMATCOPY_K_RT ZOMATCOPY_K_RT +#define OMATCOPY_K_CNC ZOMATCOPY_K_CNC +#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC +#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC +#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC + #else #define AMAX_K CAMAX_K @@ -2123,6 +2148,17 @@ #define SYMM_OUTCOPY CSYMM_OUTCOPY #define SYMM_OLTCOPY CSYMM_OLTCOPY +#define AXPBY_K CAXPBY_K + +#define OMATCOPY_K_CN COMATCOPY_K_CN +#define OMATCOPY_K_RN COMATCOPY_K_RN +#define OMATCOPY_K_CT COMATCOPY_K_CT +#define OMATCOPY_K_RT COMATCOPY_K_RT +#define OMATCOPY_K_CNC COMATCOPY_K_CNC +#define OMATCOPY_K_RNC COMATCOPY_K_RNC +#define OMATCOPY_K_CTC COMATCOPY_K_CTC +#define OMATCOPY_K_RTC COMATCOPY_K_RTC + #endif #endif diff --git a/common_param.h b/common_param.h index e978193d4..14dbc7e7d 100644 --- a/common_param.h +++ b/common_param.h @@ -806,10 +806,47 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); #endif + void (*init)(void); int snum_opt, dnum_opt, qnum_opt; + int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG); + int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG); + int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG); + int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG); + + int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); + int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); + int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); + int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); + + int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + + int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + + int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + + int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + + int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + + } gotoblas_t; extern gotoblas_t *gotoblas; diff --git a/common_s.h b/common_s.h index 267c5f381..4e9b6dbe7 100644 --- a/common_s.h +++ b/common_s.h @@ -146,6 +146,14 @@ #define SNEG_TCOPY sneg_tcopy #define SLASWP_NCOPY slaswp_ncopy +#define SAXPBY_K saxpby_k + +#define SOMATCOPY_K_CN somatcopy_k_cn +#define SOMATCOPY_K_RN somatcopy_k_rn +#define SOMATCOPY_K_CT somatcopy_k_ct +#define SOMATCOPY_K_RT somatcopy_k_rt + + #else #define SAMAX_K gotoblas -> samax_k @@ -259,6 +267,14 @@ #define SNEG_TCOPY gotoblas -> sneg_tcopy #define SLASWP_NCOPY gotoblas -> slaswp_ncopy +#define SAXPBY_K gotoblas -> saxpby_k + +#define SOMATCOPY_K_CN gotoblas -> somatcopy_k_cn +#define SOMATCOPY_K_RN gotoblas -> somatcopy_k_rn +#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct +#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt + + #endif #define SGEMM_NN sgemm_nn diff --git a/common_z.h b/common_z.h index 8832caccb..133dea80c 100644 --- a/common_z.h +++ b/common_z.h @@ -209,6 +209,18 @@ #define ZNEG_TCOPY zneg_tcopy #define ZLASWP_NCOPY zlaswp_ncopy +#define ZAXPBY_K zaxpby_k + +#define ZOMATCOPY_K_CN zomatcopy_k_cn +#define ZOMATCOPY_K_RN zomatcopy_k_rn +#define ZOMATCOPY_K_CT zomatcopy_k_ct +#define ZOMATCOPY_K_RT zomatcopy_k_rt +#define ZOMATCOPY_K_CNC zomatcopy_k_cnc +#define ZOMATCOPY_K_RNC zomatcopy_k_rnc +#define ZOMATCOPY_K_CTC zomatcopy_k_ctc +#define ZOMATCOPY_K_RTC zomatcopy_k_rtc + + #else #define ZAMAX_K gotoblas -> zamax_k @@ -380,6 +392,17 @@ #define ZNEG_TCOPY gotoblas -> zneg_tcopy #define ZLASWP_NCOPY gotoblas -> zlaswp_ncopy +#define ZAXPBY_K gotoblas -> zaxpby_k + +#define ZOMATCOPY_K_CN gotoblas -> zomatcopy_k_cn +#define ZOMATCOPY_K_RN gotoblas -> zomatcopy_k_rn +#define ZOMATCOPY_K_CT gotoblas -> zomatcopy_k_ct +#define ZOMATCOPY_K_RT gotoblas -> zomatcopy_k_rt +#define ZOMATCOPY_K_CNC gotoblas -> zomatcopy_k_cnc +#define ZOMATCOPY_K_RNC gotoblas -> zomatcopy_k_rnc +#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc +#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc + #endif #define ZGEMM_NN zgemm_nn diff --git a/exports/gensymbol b/exports/gensymbol index 58a309f9e..45566352f 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -22,7 +22,9 @@ zhbmv,zhemm,zhemv,zher2,zher2k,zher,zherk,zhpmv,zhpr2, zhpr,zrotg,zscal,zswap,zsymm,zsyr2k,zsyrk,ztbmv, ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv, zsymv, - xerbla); + xerbla, + saxpby,daxpby,caxpby,zaxpby + ); @cblasobjs = ( cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, @@ -49,7 +51,9 @@ cblas_zhemv, cblas_zher2, cblas_zher2k, cblas_zher, cblas_zherk, cblas_zhpmv, cblas_zhpr2, cblas_zhpr, cblas_zscal, cblas_zswap, cblas_zsymm, cblas_zsyr2k, cblas_zsyrk, cblas_ztbmv, cblas_ztbsv, cblas_ztpmv, cblas_ztpsv, cblas_ztrmm, cblas_ztrmv, cblas_ztrsm, - cblas_ztrsv, cblas_cdotc_sub, cblas_cdotu_sub, cblas_zdotc_sub, cblas_zdotu_sub ); + cblas_ztrsv, cblas_cdotc_sub, cblas_cdotu_sub, cblas_zdotc_sub, cblas_zdotu_sub, + cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby + ); @exblasobjs = ( qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, diff --git a/interface/Makefile b/interface/Makefile index 9774f37b2..51f9937b8 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -27,6 +27,7 @@ SBLAS1OBJS = \ smax.$(SUFFIX) samax.$(SUFFIX) ismax.$(SUFFIX) isamax.$(SUFFIX) \ smin.$(SUFFIX) samin.$(SUFFIX) ismin.$(SUFFIX) isamin.$(SUFFIX) \ srot.$(SUFFIX) srotg.$(SUFFIX) srotm.$(SUFFIX) srotmg.$(SUFFIX) \ + saxpby.$(SUFFIX) SBLAS2OBJS = \ sgemv.$(SUFFIX) sger.$(SUFFIX) \ @@ -39,16 +40,19 @@ SBLAS2OBJS = \ SBLAS3OBJS = \ sgemm.$(SUFFIX) ssymm.$(SUFFIX) strmm.$(SUFFIX) \ - strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) + strsm.$(SUFFIX) ssyrk.$(SUFFIX) ssyr2k.$(SUFFIX) \ + somatcopy.$(SUFFIX) simatcopy.$(SUFFIX) + DBLAS1OBJS = \ daxpy.$(SUFFIX) dswap.$(SUFFIX) \ dcopy.$(SUFFIX) dscal.$(SUFFIX) \ - ddot.$(SUFFIX) \ + ddot.$(SUFFIX) \ dasum.$(SUFFIX) dnrm2.$(SUFFIX) \ dmax.$(SUFFIX) damax.$(SUFFIX) idmax.$(SUFFIX) idamax.$(SUFFIX) \ dmin.$(SUFFIX) damin.$(SUFFIX) idmin.$(SUFFIX) idamin.$(SUFFIX) \ drot.$(SUFFIX) drotg.$(SUFFIX) drotm.$(SUFFIX) drotmg.$(SUFFIX) \ + daxpby.$(SUFFIX) DBLAS2OBJS = \ dgemv.$(SUFFIX) dger.$(SUFFIX) \ @@ -61,7 +65,8 @@ DBLAS2OBJS = \ DBLAS3OBJS = \ dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \ - dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) + dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \ + domatcopy.$(SUFFIX) dimatcopy.$(SUFFIX) CBLAS1OBJS = \ caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ @@ -71,6 +76,7 @@ CBLAS1OBJS = \ scamax.$(SUFFIX) icamax.$(SUFFIX) \ scamin.$(SUFFIX) icamin.$(SUFFIX) \ csrot.$(SUFFIX) crotg.$(SUFFIX) \ + caxpby.$(SUFFIX) CBLAS2OBJS = \ cgemv.$(SUFFIX) cgeru.$(SUFFIX) cgerc.$(SUFFIX) \ @@ -87,7 +93,8 @@ CBLAS2OBJS = \ CBLAS3OBJS = \ cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \ ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \ - chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) + chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \ + comatcopy.$(SUFFIX) cimatcopy.$(SUFFIX) ZBLAS1OBJS = \ zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ @@ -97,6 +104,7 @@ ZBLAS1OBJS = \ dzamax.$(SUFFIX) izamax.$(SUFFIX) \ dzamin.$(SUFFIX) izamin.$(SUFFIX) \ zdrot.$(SUFFIX) zrotg.$(SUFFIX) \ + zaxpby.$(SUFFIX) ZBLAS2OBJS = \ zgemv.$(SUFFIX) zgeru.$(SUFFIX) zgerc.$(SUFFIX) \ @@ -113,7 +121,8 @@ ZBLAS2OBJS = \ ZBLAS3OBJS = \ zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \ ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \ - zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) + zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ + zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) ifdef SUPPORT_GEMM3M @@ -246,7 +255,7 @@ CSBLAS1OBJS = \ cblas_isamax.$(SUFFIX) cblas_sasum.$(SUFFIX) cblas_saxpy.$(SUFFIX) \ cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \ cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \ - cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) + cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) cblas_saxpby.$(SUFFIX) CSBLAS2OBJS = \ cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \ @@ -262,7 +271,7 @@ CDBLAS1OBJS = \ cblas_idamax.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \ cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \ cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \ - cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) + cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) cblas_daxpby.$(SUFFIX) CDBLAS2OBJS = \ cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \ @@ -280,7 +289,8 @@ CCBLAS1OBJS = \ cblas_cdotc.$(SUFFIX) cblas_cdotu.$(SUFFIX) \ cblas_cdotc_sub.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) \ cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \ - cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) + cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \ + cblas_caxpby.$(SUFFIX) CCBLAS2OBJS = \ cblas_cgemv.$(SUFFIX) cblas_cgerc.$(SUFFIX) cblas_cgeru.$(SUFFIX) \ @@ -301,7 +311,8 @@ CZBLAS1OBJS = \ cblas_zdotc.$(SUFFIX) cblas_zdotu.$(SUFFIX) \ cblas_zdotc_sub.$(SUFFIX) cblas_zdotu_sub.$(SUFFIX) \ cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \ - cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) + cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \ + cblas_zaxpby.$(SUFFIX) CZBLAS2OBJS = \ cblas_zgemv.$(SUFFIX) cblas_zgerc.$(SUFFIX) cblas_zgeru.$(SUFFIX) \ @@ -1991,3 +2002,55 @@ zlarf.$(SUFFIX) zlarf.$(PSUFFIX) : larf.c xlarf.$(SUFFIX) xlarf.$(PSUFFIX) : larf.c $(CC) -c $(CFLAGS) $< -o $(@F) + +############# BLAS EXTENSIONS ##################################### + +daxpby.$(SUFFIX) daxpby.$(PSUFFIX) : axpby.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cblas_daxpby.$(SUFFIX) cblas_daxpby.$(PSUFFIX) : axpby.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +saxpby.$(SUFFIX) saxpby.$(PSUFFIX) : axpby.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cblas_saxpby.$(SUFFIX) cblas_saxpby.$(PSUFFIX) : axpby.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +zaxpby.$(SUFFIX) zaxpby.$(PSUFFIX) : zaxpby.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cblas_zaxpby.$(SUFFIX) cblas_zaxpby.$(PSUFFIX) : zaxpby.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +caxpby.$(SUFFIX) caxpby.$(PSUFFIX) : zaxpby.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +cblas_caxpby.$(SUFFIX) cblas_caxpby.$(PSUFFIX) : zaxpby.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +domatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +somatcopy.$(SUFFIX) somatcopy.$(PSUFFIX) : omatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +comatcopy.$(SUFFIX) comatcopy.$(PSUFFIX) : zomatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zomatcopy.$(SUFFIX) zomatcopy.$(PSUFFIX) : zomatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +dimatcopy.$(SUFFIX) dimatcopy.$(PSUFFIX) : imatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +simatcopy.$(SUFFIX) simatcopy.$(PSUFFIX) : imatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +cimatcopy.$(SUFFIX) cimatcopy.$(PSUFFIX) : zimatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zimatcopy.$(SUFFIX) zimatcopy.$(PSUFFIX) : zimatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + + diff --git a/interface/axpby.c b/interface/axpby.c new file mode 100644 index 000000000..7e6fcf4c1 --- /dev/null +++ b/interface/axpby.c @@ -0,0 +1,72 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/****************************************************************** + 2014/06/07 Saar +******************************************************************/ + + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY) +{ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + BLASLONG incy = *INCY; + FLOAT alpha = *ALPHA; + FLOAT beta = *BETA; + +#else + +void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy) +{ + +#endif + + if (n <= 0) return; + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx; + if (incy < 0) y -= (n - 1) * incy; + + AXPBY_K(n, alpha, x, incx, beta, y, incy); + + FUNCTION_PROFILE_END(1, 2 * n, 2 * n); + + return; + +} + + diff --git a/interface/imatcopy.c b/interface/imatcopy.c new file mode 100644 index 000000000..4a86d83cd --- /dev/null +++ b/interface/imatcopy.c @@ -0,0 +1,142 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*********************************************************** + * 2014/06/10 Saar +***********************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "DIMATCOPY" +#else +#define ERROR_NAME "SIMATCOPY" +#endif + +#define BlasRowMajor 0 +#define BlasColMajor 1 +#define BlasNoTrans 0 +#define BlasTrans 1 + +#undef malloc +#undef free + +void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb) +{ + + char Order, Trans; + int order=-1,trans=-1; + blasint info = -1; + FLOAT *b; + size_t msize; + + Order = *ORDER; + Trans = *TRANS; + + TOUPPER(Order); + TOUPPER(Trans); + + if ( Order == 'C' ) order = BlasColMajor; + if ( Order == 'R' ) order = BlasRowMajor; + if ( Trans == 'N' ) trans = BlasNoTrans; + if ( Trans == 'R' ) trans = BlasNoTrans; + if ( Trans == 'T' ) trans = BlasTrans; + if ( Trans == 'C' ) trans = BlasTrans; + + if ( order == BlasColMajor) + { + if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; + if ( trans == BlasTrans && *ldb < *cols ) info = 9; + } + if ( order == BlasRowMajor) + { + if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; + if ( trans == BlasTrans && *ldb < *rows ) info = 9; + } + + if ( order == BlasColMajor && *lda < *rows ) info = 7; + if ( order == BlasRowMajor && *lda < *cols ) info = 7; + if ( *cols <= 0 ) info = 4; + if ( *rows <= 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if ( *lda > *ldb ) + msize = (*lda) * (*ldb) * sizeof(FLOAT); + else + msize = (*ldb) * (*ldb) * sizeof(FLOAT); + + b = malloc(msize); + if ( b == NULL ) + { + printf("Memory alloc failed\n"); + exit(1); + } + + if ( order == BlasColMajor ) + { + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0 , b, *ldb, a, *ldb ); + } + else + { + OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb ); + } + } + else + { + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb ); + } + else + { + OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, b, *ldb, a, *ldb ); + } + } + + free(b); + return; + +} + + diff --git a/interface/omatcopy.c b/interface/omatcopy.c new file mode 100644 index 000000000..1727e04ef --- /dev/null +++ b/interface/omatcopy.c @@ -0,0 +1,120 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*********************************************************** + * 2014/06/09 Saar +***********************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "DOMATCOPY" +#else +#define ERROR_NAME "SOMATCOPY" +#endif + +#define BlasRowMajor 0 +#define BlasColMajor 1 +#define BlasNoTrans 0 +#define BlasTrans 1 + +void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb) +{ + + char Order, Trans; + int order=-1,trans=-1; + blasint info = -1; + + Order = *ORDER; + Trans = *TRANS; + + TOUPPER(Order); + TOUPPER(Trans); + + if ( Order == 'C' ) order = BlasColMajor; + if ( Order == 'R' ) order = BlasRowMajor; + if ( Trans == 'N' ) trans = BlasNoTrans; + if ( Trans == 'R' ) trans = BlasNoTrans; + if ( Trans == 'T' ) trans = BlasTrans; + if ( Trans == 'C' ) trans = BlasTrans; + + if ( order == BlasColMajor) + { + if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; + if ( trans == BlasTrans && *ldb < *cols ) info = 9; + } + if ( order == BlasRowMajor) + { + if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; + if ( trans == BlasTrans && *ldb < *rows ) info = 9; + } + + if ( order == BlasColMajor && *lda < *rows ) info = 7; + if ( order == BlasRowMajor && *lda < *cols ) info = 7; + if ( *cols <= 0 ) info = 4; + if ( *rows <= 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if ( order == BlasColMajor ) + { + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb ); + } + else + { + OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb ); + } + } + else + { + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb ); + } + else + { + OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb ); + } + } + + return; + +} + + diff --git a/interface/scal.c b/interface/scal.c index 7b72ca01c..c3e03c742 100644 --- a/interface/scal.c +++ b/interface/scal.c @@ -78,6 +78,9 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){ #ifdef SMP nthreads = num_cpu_avail(1); + if (n <= 1048576 ) + nthreads = 1; + if (nthreads == 1) { #endif diff --git a/interface/zaxpby.c b/interface/zaxpby.c new file mode 100644 index 000000000..ff7510749 --- /dev/null +++ b/interface/zaxpby.c @@ -0,0 +1,74 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/********************************************************************** + 2014/06/07 Saar + +**********************************************************************/ + + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY) +{ + + blasint n = *N; + blasint incx = *INCX; + blasint incy = *INCY; + +#else + +void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *BETA, FLOAT *y, blasint incy) +{ + +#endif + + if (n <= 0) return; + + FLOAT alpha_r = *(ALPHA + 0); + FLOAT alpha_i = *(ALPHA + 1); + FLOAT beta_r = *(BETA + 0); + FLOAT beta_i = *(BETA + 1); + + FUNCTION_PROFILE_START(); + + if (incx < 0) x -= (n - 1) * incx * 2; + if (incy < 0) y -= (n - 1) * incy * 2; + + AXPBY_K (n, alpha_r, alpha_i, x, incx, beta_r, beta_i, y, incy); + + FUNCTION_PROFILE_END(4, 2 * n, 2 * n); + + return; + +} diff --git a/interface/zimatcopy.c b/interface/zimatcopy.c new file mode 100644 index 000000000..90402d3c4 --- /dev/null +++ b/interface/zimatcopy.c @@ -0,0 +1,185 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*********************************************************** + * 2014/06/10 Saar +***********************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "ZIMATCOPY" +#else +#define ERROR_NAME "CIMATCOPY" +#endif + +#define BlasRowMajor 0 +#define BlasColMajor 1 +#define BlasNoTrans 0 +#define BlasTrans 1 +#define BlasTransConj 2 +#define BlasConj 3 + +void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, blasint *ldb) +{ + + char Order, Trans; + int order=-1,trans=-1; + blasint info = -1; + FLOAT *b; + size_t msize; + + Order = *ORDER; + Trans = *TRANS; + + TOUPPER(Order); + TOUPPER(Trans); + + if ( Order == 'C' ) order = BlasColMajor; + if ( Order == 'R' ) order = BlasRowMajor; + if ( Trans == 'N' ) trans = BlasNoTrans; + if ( Trans == 'T' ) trans = BlasTrans; + if ( Trans == 'C' ) trans = BlasTransConj; + if ( Trans == 'R' ) trans = BlasConj; + + if ( order == BlasColMajor) + { + if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; + if ( trans == BlasConj && *ldb < *rows ) info = 9; + if ( trans == BlasTrans && *ldb < *cols ) info = 9; + if ( trans == BlasTransConj && *ldb < *cols ) info = 9; + } + if ( order == BlasRowMajor) + { + if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; + if ( trans == BlasConj && *ldb < *cols ) info = 9; + if ( trans == BlasTrans && *ldb < *rows ) info = 9; + if ( trans == BlasTransConj && *ldb < *rows ) info = 9; + } + + if ( order == BlasColMajor && *lda < *rows ) info = 7; + if ( order == BlasRowMajor && *lda < *cols ) info = 7; + if ( *cols <= 0 ) info = 4; + if ( *rows <= 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if ( *lda > *ldb ) + msize = (*lda) * (*ldb) * sizeof(FLOAT) * 2; + else + msize = (*ldb) * (*ldb) * sizeof(FLOAT) * 2; + + b = malloc(msize); + if ( b == NULL ) + { + printf("Memory alloc failed\n"); + exit(1); + } + + + if ( order == BlasColMajor ) + { + + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasConj ) + { + OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasTrans ) + { + OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasTransConj ) + { + OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_CN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + + } + else + { + + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasConj ) + { + OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasTrans ) + { + OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + if ( trans == BlasTransConj ) + { + OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + OMATCOPY_K_RN(*rows, *cols, (FLOAT) 1.0, (FLOAT) 0.0 , b, *ldb, a, *ldb ); + free(b); + return; + } + + } + + return; + +} + + diff --git a/interface/zomatcopy.c b/interface/zomatcopy.c new file mode 100644 index 000000000..819e57bab --- /dev/null +++ b/interface/zomatcopy.c @@ -0,0 +1,154 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*********************************************************** + * 2014/06/09 Saar +***********************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "ZOMATCOPY" +#else +#define ERROR_NAME "COMATCOPY" +#endif + +#define BlasRowMajor 0 +#define BlasColMajor 1 +#define BlasNoTrans 0 +#define BlasTrans 1 +#define BlasTransConj 2 +#define BlasConj 3 + +void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb) +{ + + char Order, Trans; + int order=-1,trans=-1; + blasint info = -1; + + Order = *ORDER; + Trans = *TRANS; + + TOUPPER(Order); + TOUPPER(Trans); + + if ( Order == 'C' ) order = BlasColMajor; + if ( Order == 'R' ) order = BlasRowMajor; + if ( Trans == 'N' ) trans = BlasNoTrans; + if ( Trans == 'T' ) trans = BlasTrans; + if ( Trans == 'C' ) trans = BlasTransConj; + if ( Trans == 'R' ) trans = BlasConj; + + if ( order == BlasColMajor) + { + if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; + if ( trans == BlasConj && *ldb < *rows ) info = 9; + if ( trans == BlasTrans && *ldb < *cols ) info = 9; + if ( trans == BlasTransConj && *ldb < *cols ) info = 9; + } + if ( order == BlasRowMajor) + { + if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; + if ( trans == BlasConj && *ldb < *cols ) info = 9; + if ( trans == BlasTrans && *ldb < *rows ) info = 9; + if ( trans == BlasTransConj && *ldb < *rows ) info = 9; + } + + if ( order == BlasColMajor && *lda < *rows ) info = 7; + if ( order == BlasRowMajor && *lda < *cols ) info = 7; + if ( *cols <= 0 ) info = 4; + if ( *rows <= 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if ( order == BlasColMajor ) + { + + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasConj ) + { + OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasTrans ) + { + OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasTransConj ) + { + OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + + } + else + { + + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasConj ) + { + OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasTrans ) + { + OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasTransConj ) + { + OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + + } + + return; + +} + + diff --git a/interface/zscal.c b/interface/zscal.c index ad99874dc..1e6fdecdb 100644 --- a/interface/zscal.c +++ b/interface/zscal.c @@ -90,6 +90,9 @@ void CNAME(blasint n, FLOAT alpha_r, FLOAT *x, blasint incx){ #ifdef SMP nthreads = num_cpu_avail(1); + if ( n <= 1048576 ) + nthreads = 1; + if (nthreads == 1) { #endif diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1 index 667145c4e..1153443c2 100644 --- a/kernel/Makefile.L1 +++ b/kernel/Makefile.L1 @@ -432,18 +432,38 @@ ifndef LSAME_KERNEL LSAME_KERNEL = lsame.S endif +### AXPBY ### + +ifndef SAXPBYKERNEL +SAXPBYKERNEL = ../arm/axpby.c +endif + +ifndef DAXPBYKERNEL +DAXPBYKERNEL = ../arm/axpby.c +endif + +ifndef CAXPBYKERNEL +CAXPBYKERNEL = ../arm/zaxpby.c +endif + +ifndef ZAXPBYKERNEL +ZAXPBYKERNEL = ../arm/zaxpby.c +endif + SBLASOBJS += \ samax_k$(TSUFFIX).$(SUFFIX) samin_k$(TSUFFIX).$(SUFFIX) smax_k$(TSUFFIX).$(SUFFIX) smin_k$(TSUFFIX).$(SUFFIX) \ isamax_k$(TSUFFIX).$(SUFFIX) isamin_k$(TSUFFIX).$(SUFFIX) ismax_k$(TSUFFIX).$(SUFFIX) ismin_k$(TSUFFIX).$(SUFFIX) \ sasum_k$(TSUFFIX).$(SUFFIX) saxpy_k$(TSUFFIX).$(SUFFIX) scopy_k$(TSUFFIX).$(SUFFIX) \ sdot_k$(TSUFFIX).$(SUFFIX) sdsdot_k$(TSUFFIX).$(SUFFIX) dsdot_k$(TSUFFIX).$(SUFFIX) \ - snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) + snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) \ + saxpby_k$(TSUFFIX).$(SUFFIX) DBLASOBJS += \ damax_k$(TSUFFIX).$(SUFFIX) damin_k$(TSUFFIX).$(SUFFIX) dmax_k$(TSUFFIX).$(SUFFIX) dmin_k$(TSUFFIX).$(SUFFIX) \ idamax_k$(TSUFFIX).$(SUFFIX) idamin_k$(TSUFFIX).$(SUFFIX) idmax_k$(TSUFFIX).$(SUFFIX) idmin_k$(TSUFFIX).$(SUFFIX) \ dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \ - dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) + dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \ + daxpby_k$(TSUFFIX).$(SUFFIX) QBLASOBJS += \ qamax_k$(TSUFFIX).$(SUFFIX) qamin_k$(TSUFFIX).$(SUFFIX) qmax_k$(TSUFFIX).$(SUFFIX) qmin_k$(TSUFFIX).$(SUFFIX) \ @@ -455,13 +475,13 @@ CBLASOBJS += \ camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \ cdotc_k$(TSUFFIX).$(SUFFIX) cdotu_k$(TSUFFIX).$(SUFFIX) cnrm2_k$(TSUFFIX).$(SUFFIX) csrot_k$(TSUFFIX).$(SUFFIX) \ - cscal_k$(TSUFFIX).$(SUFFIX) cswap_k$(TSUFFIX).$(SUFFIX) + cscal_k$(TSUFFIX).$(SUFFIX) cswap_k$(TSUFFIX).$(SUFFIX) caxpby_k$(TSUFFIX).$(SUFFIX) ZBLASOBJS += \ zamax_k$(TSUFFIX).$(SUFFIX) zamin_k$(TSUFFIX).$(SUFFIX) izamax_k$(TSUFFIX).$(SUFFIX) izamin_k$(TSUFFIX).$(SUFFIX) \ zasum_k$(TSUFFIX).$(SUFFIX) zaxpy_k$(TSUFFIX).$(SUFFIX) zaxpyc_k$(TSUFFIX).$(SUFFIX) zcopy_k$(TSUFFIX).$(SUFFIX) \ zdotc_k$(TSUFFIX).$(SUFFIX) zdotu_k$(TSUFFIX).$(SUFFIX) znrm2_k$(TSUFFIX).$(SUFFIX) zdrot_k$(TSUFFIX).$(SUFFIX) \ - zscal_k$(TSUFFIX).$(SUFFIX) zswap_k$(TSUFFIX).$(SUFFIX) + zscal_k$(TSUFFIX).$(SUFFIX) zswap_k$(TSUFFIX).$(SUFFIX) zaxpby_k$(TSUFFIX).$(SUFFIX) XBLASOBJS += \ xamax_k$(TSUFFIX).$(SUFFIX) xamin_k$(TSUFFIX).$(SUFFIX) ixamax_k$(TSUFFIX).$(SUFFIX) ixamin_k$(TSUFFIX).$(SUFFIX) \ @@ -765,3 +785,17 @@ $(KDIR)zswap_k$(TSUFFIX).$(SUFFIX) $(KDIR)zswap_k$(TPSUFFIX).$(PSUFFIX) : $(KE $(KDIR)xswap_k$(TSUFFIX).$(SUFFIX) $(KDIR)xswap_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSWAPKERNEL) $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE $< -o $@ + +$(KDIR)saxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)saxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SAXPBYKERNEL) + $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -UDOUBLE $< -o $@ + +$(KDIR)daxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)daxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DAXPBYKERNEL) + $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ + +$(KDIR)caxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPBYKERNEL) + $(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@ + +$(KDIR)zaxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)zaxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZAXPBYKERNEL) + $(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -DDOUBLE $< -o $@ + + diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index b9b4bef1e..76857a2ba 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -320,6 +320,28 @@ XBLASOBJS += \ endif +###### BLAS extensions ##### +SBLASOBJS += \ + somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) + +DBLASOBJS += \ + domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) + +CBLASOBJS += \ + comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ + comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) + +ZBLASOBJS += \ + zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ + zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) + + SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) @@ -3237,3 +3259,178 @@ $(KDIR)xtrsm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL $(KDIR)xtrsm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_N).c $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ + + +##### BLAS extensions ###### + +ifndef DOMATCOPY_CN +DOMATCOPY_CN = ../arm/omatcopy_cn.c +endif + +$(KDIR)domatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CN) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef DOMATCOPY_RN +DOMATCOPY_RN = ../arm/omatcopy_rn.c +endif + +$(KDIR)domatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RN) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ + +ifndef DOMATCOPY_CT +DOMATCOPY_CT = ../arm/omatcopy_ct.c +endif + +$(KDIR)domatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CT) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef DOMATCOPY_RT +DOMATCOPY_RT = ../arm/omatcopy_rt.c +endif + +$(KDIR)domatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RT) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ + +ifndef SOMATCOPY_CN +SOMATCOPY_CN = ../arm/omatcopy_cn.c +endif + +$(KDIR)somatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_CN) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef SOMATCOPY_RN +SOMATCOPY_RN = ../arm/omatcopy_rn.c +endif + +$(KDIR)somatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_RN) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ + +ifndef SOMATCOPY_CT +SOMATCOPY_CT = ../arm/omatcopy_ct.c +endif + +$(KDIR)somatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_CT) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ + +ifndef SOMATCOPY_RT +SOMATCOPY_RT = ../arm/omatcopy_rt.c +endif + +$(KDIR)somatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_RT) + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ + + +ifndef COMATCOPY_CN +COMATCOPY_CN = ../arm/zomatcopy_cn.c +endif + +$(KDIR)comatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CN) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef COMATCOPY_RN +COMATCOPY_RN = ../arm/zomatcopy_rn.c +endif + +$(KDIR)comatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RN) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef COMATCOPY_CT +COMATCOPY_CT = ../arm/zomatcopy_ct.c +endif + +$(KDIR)comatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CT) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef COMATCOPY_RT +COMATCOPY_RT = ../arm/zomatcopy_rt.c +endif + +$(KDIR)comatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RT) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef COMATCOPY_CNC +COMATCOPY_CNC = ../arm/zomatcopy_cnc.c +endif + +$(KDIR)comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CNC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef COMATCOPY_RNC +COMATCOPY_RNC = ../arm/zomatcopy_rnc.c +endif + +$(KDIR)comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RNC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + +ifndef COMATCOPY_CTC +COMATCOPY_CTC = ../arm/zomatcopy_ctc.c +endif + +$(KDIR)comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CTC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef COMATCOPY_RTC +COMATCOPY_RTC = ../arm/zomatcopy_rtc.c +endif + +$(KDIR)comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RTC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + + +ifndef ZOMATCOPY_CN +ZOMATCOPY_CN = ../arm/zomatcopy_cn.c +endif + +$(KDIR)zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CN) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef ZOMATCOPY_RN +ZOMATCOPY_RN = ../arm/zomatcopy_rn.c +endif + +$(KDIR)zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RN) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef ZOMATCOPY_CT +ZOMATCOPY_CT = ../arm/zomatcopy_ct.c +endif + +$(KDIR)zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CT) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef ZOMATCOPY_RT +ZOMATCOPY_RT = ../arm/zomatcopy_rt.c +endif + +$(KDIR)zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RT) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef ZOMATCOPY_CNC +ZOMATCOPY_CNC = ../arm/zomatcopy_cnc.c +endif + +$(KDIR)zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CNC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef ZOMATCOPY_RNC +ZOMATCOPY_RNC = ../arm/zomatcopy_rnc.c +endif + +$(KDIR)zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RNC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + +ifndef ZOMATCOPY_CTC +ZOMATCOPY_CTC = ../arm/zomatcopy_ctc.c +endif + +$(KDIR)zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CTC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef ZOMATCOPY_RTC +ZOMATCOPY_RTC = ../arm/zomatcopy_rtc.c +endif + +$(KDIR)zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RTC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + + diff --git a/kernel/arm/axpby.c b/kernel/arm/axpby.c new file mode 100644 index 000000000..51cfe1f46 --- /dev/null +++ b/kernel/arm/axpby.c @@ -0,0 +1,96 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + + ix = 0; + iy = 0; + + if ( beta == 0.0 ) + { + + if ( alpha == 0.0 ) + { + while(i < n) + { + y[iy] = 0.0 ; + iy += inc_y ; + i++ ; + } + } + else + { + while(i < n) + { + y[iy] = alpha * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + } + + + } + + } + else + { + + if ( alpha == 0.0 ) + { + while(i < n) + { + y[iy] = beta * y[iy] ; + iy += inc_y ; + i++ ; + } + } + else + { + while(i < n) + { + y[iy] = alpha * x[ix] + beta * y[iy] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + } + } + + } + + return(0); + +} + + diff --git a/kernel/arm/omatcopy_cn.c b/kernel/arm/omatcopy_cn.c new file mode 100644 index 000000000..e46ddaede --- /dev/null +++ b/kernel/arm/omatcopy_cn.c @@ -0,0 +1,90 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +/***************************************************** + * 2014/06/09 Saar + * + * Order ColMajor + * No Trans + * +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i,j; + FLOAT *aptr,*bptr; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + aptr = a; + bptr = b; + + if ( alpha == 0.0 ) + { + for ( i=0; i