From 8c8f5962384e8a3ec54021ea52ab7d2cc24bbc1a Mon Sep 17 00:00:00 2001 From: wernsaar Date: Mon, 9 Jun 2014 17:11:07 +0200 Subject: [PATCH] Ref #51: added blas extension domatcopy as not opimized reference --- common_d.h | 9 +++ common_interface.h | 4 ++ common_level3.h | 5 ++ common_macro.h | 4 ++ common_param.h | 6 ++ interface/Makefile | 6 +- interface/omatcopy.c | 120 +++++++++++++++++++++++++++++++++++++++ kernel/Makefile.L3 | 44 ++++++++++++++ kernel/arm/omatcopy_cn.c | 90 +++++++++++++++++++++++++++++ kernel/arm/omatcopy_ct.c | 89 +++++++++++++++++++++++++++++ kernel/arm/omatcopy_rn.c | 90 +++++++++++++++++++++++++++++ kernel/arm/omatcopy_rt.c | 62 ++++++++++++++++++++ kernel/setparam-ref.c | 6 +- 13 files changed, 529 insertions(+), 6 deletions(-) create mode 100644 interface/omatcopy.c create mode 100644 kernel/arm/omatcopy_cn.c create mode 100644 kernel/arm/omatcopy_ct.c create mode 100644 kernel/arm/omatcopy_rn.c create mode 100644 kernel/arm/omatcopy_rt.c diff --git a/common_d.h b/common_d.h index e6e765a6f..c34e1f28f 100644 --- a/common_d.h +++ b/common_d.h @@ -145,6 +145,10 @@ #define DLASWP_NCOPY dlaswp_ncopy #define DAXPBY_K daxpby_k +#define DOMATCOPY_K_CN domatcopy_k_cn +#define DOMATCOPY_K_RN domatcopy_k_rn +#define DOMATCOPY_K_CT domatcopy_k_ct +#define DOMATCOPY_K_RT domatcopy_k_rt #else @@ -258,6 +262,11 @@ #define DLASWP_NCOPY gotoblas -> dlaswp_ncopy #define DAXPBY_K gotoblas -> daxpby_k +#define DOMATCOPY_K_CN gotoblas -> domatcopy_k_cn +#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn +#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct +#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt + #endif #define DGEMM_NN dgemm_nn diff --git a/common_interface.h b/common_interface.h index 0112620d0..0311e6776 100644 --- a/common_interface.h +++ b/common_interface.h @@ -764,6 +764,10 @@ void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, void BLASFUNC(caxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *); void BLASFUNC(zaxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *); +void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *); +void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *); +void BLASFUNC(comatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *); +void BLASFUNC(zomatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *); #ifdef __cplusplus } diff --git a/common_level3.h b/common_level3.h index cbc67a6c3..8a33c20ba 100644 --- a/common_level3.h +++ b/common_level3.h @@ -1732,6 +1732,11 @@ int zgemc_otcopy(BLASLONG m, BLASLONG n, double *a, BLASLONG lda, double *b, BLA int xgemc_oncopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c); int xgemc_otcopy(BLASLONG m, BLASLONG n, xdouble *a, BLASLONG lda, xdouble *b, BLASLONG ldb, xdouble *c); +int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); +int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); +int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); +int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); + #ifdef __CUDACC__ } #endif diff --git a/common_macro.h b/common_macro.h index dc6f5ebb3..1a08613ca 100644 --- a/common_macro.h +++ b/common_macro.h @@ -629,6 +629,10 @@ #define HERK_THREAD_LC DSYRK_THREAD_LT #define AXPBY_K DAXPBY_K +#define OMATCOPY_K_CN DOMATCOPY_K_CN +#define OMATCOPY_K_RN DOMATCOPY_K_RN +#define OMATCOPY_K_CT DOMATCOPY_K_CT +#define OMATCOPY_K_RT DOMATCOPY_K_RT #else diff --git a/common_param.h b/common_param.h index 1f2f2e072..1b7d8a5a7 100644 --- a/common_param.h +++ b/common_param.h @@ -816,6 +816,12 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG); int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG); + int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + + } gotoblas_t; extern gotoblas_t *gotoblas; diff --git a/interface/Makefile b/interface/Makefile index 963e8b68e..aaae70271 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -63,7 +63,8 @@ DBLAS2OBJS = \ DBLAS3OBJS = \ dgemm.$(SUFFIX) dsymm.$(SUFFIX) dtrmm.$(SUFFIX) \ - dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) + dtrsm.$(SUFFIX) dsyrk.$(SUFFIX) dsyr2k.$(SUFFIX) \ + domatcopy.$(SUFFIX) CBLAS1OBJS = \ caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ @@ -2024,5 +2025,6 @@ caxpby.$(SUFFIX) caxpby.$(PSUFFIX) : zaxpby.c cblas_caxpby.$(SUFFIX) cblas_caxpby.$(PSUFFIX) : zaxpby.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) - +domatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c ../param.h + $(CC) -c $(CFLAGS) $< -o $(@F) diff --git a/interface/omatcopy.c b/interface/omatcopy.c new file mode 100644 index 000000000..1727e04ef --- /dev/null +++ b/interface/omatcopy.c @@ -0,0 +1,120 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*********************************************************** + * 2014/06/09 Saar +***********************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "DOMATCOPY" +#else +#define ERROR_NAME "SOMATCOPY" +#endif + +#define BlasRowMajor 0 +#define BlasColMajor 1 +#define BlasNoTrans 0 +#define BlasTrans 1 + +void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb) +{ + + char Order, Trans; + int order=-1,trans=-1; + blasint info = -1; + + Order = *ORDER; + Trans = *TRANS; + + TOUPPER(Order); + TOUPPER(Trans); + + if ( Order == 'C' ) order = BlasColMajor; + if ( Order == 'R' ) order = BlasRowMajor; + if ( Trans == 'N' ) trans = BlasNoTrans; + if ( Trans == 'R' ) trans = BlasNoTrans; + if ( Trans == 'T' ) trans = BlasTrans; + if ( Trans == 'C' ) trans = BlasTrans; + + if ( order == BlasColMajor) + { + if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; + if ( trans == BlasTrans && *ldb < *cols ) info = 9; + } + if ( order == BlasRowMajor) + { + if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; + if ( trans == BlasTrans && *ldb < *rows ) info = 9; + } + + if ( order == BlasColMajor && *lda < *rows ) info = 7; + if ( order == BlasRowMajor && *lda < *cols ) info = 7; + if ( *cols <= 0 ) info = 4; + if ( *rows <= 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if ( order == BlasColMajor ) + { + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_CN(*rows, *cols, *alpha, a, *lda, b, *ldb ); + } + else + { + OMATCOPY_K_CT(*rows, *cols, *alpha, a, *lda, b, *ldb ); + } + } + else + { + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_RN(*rows, *cols, *alpha, a, *lda, b, *ldb ); + } + else + { + OMATCOPY_K_RT(*rows, *cols, *alpha, a, *lda, b, *ldb ); + } + } + + return; + +} + + diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index b9b4bef1e..552773f2d 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -320,6 +320,13 @@ XBLASOBJS += \ endif +###### BLAS extensions ##### + +DBLASOBJS += \ + domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) + + SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) SGEMMONCOPYOBJ_P = $(SGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) @@ -3237,3 +3244,40 @@ $(KDIR)xtrsm_oltucopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL $(KDIR)xtrsm_oltncopy$(TSUFFIX).$(PSUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_N).c $(CC) -c $(PFLAGS) $(NO_UNINITIALIZED_WARN) -DXDOUBLE -DCOMPLEX -DOUTER -DLOWER -UUNIT $< -o $@ + + +##### BLAS extensions ###### + +ifndef DOMATCOPY_CN +DOMATCOPY_CN = ../arm/omatcopy_cn.c +endif + +$(KDIR)domatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CN) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + +ifndef DOMATCOPY_RN +DOMATCOPY_RN = ../arm/omatcopy_rn.c +endif + +$(KDIR)domatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RN) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + +ifndef DOMATCOPY_CT +DOMATCOPY_CT = ../arm/omatcopy_ct.c +endif + +$(KDIR)domatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CT) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + +ifndef DOMATCOPY_RT +DOMATCOPY_RT = ../arm/omatcopy_rt.c +endif + +$(KDIR)domatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RT) + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + + + + + + diff --git a/kernel/arm/omatcopy_cn.c b/kernel/arm/omatcopy_cn.c new file mode 100644 index 000000000..e46ddaede --- /dev/null +++ b/kernel/arm/omatcopy_cn.c @@ -0,0 +1,90 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +/***************************************************** + * 2014/06/09 Saar + * + * Order ColMajor + * No Trans + * +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i,j; + FLOAT *aptr,*bptr; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + aptr = a; + bptr = b; + + if ( alpha == 0.0 ) + { + for ( i=0; i