diff --git a/common_c.h b/common_c.h index c0ad335e0..724d1e261 100644 --- a/common_c.h +++ b/common_c.h @@ -211,6 +211,16 @@ #define CAXPBY_K caxpby_k +#define COMATCOPY_K_CN comatcopy_k_cn +#define COMATCOPY_K_RN comatcopy_k_rn +#define COMATCOPY_K_CT comatcopy_k_ct +#define COMATCOPY_K_RT comatcopy_k_rt +#define COMATCOPY_K_CNC comatcopy_k_cnc +#define COMATCOPY_K_RNC comatcopy_k_rnc +#define COMATCOPY_K_CTC comatcopy_k_ctc +#define COMATCOPY_K_RTC comatcopy_k_rtc + + #else #define CAMAX_K gotoblas -> camax_k @@ -383,6 +393,16 @@ #define CLASWP_NCOPY gotoblas -> claswp_ncopy #define CAXPBY_K gotoblas -> caxpby_k + +#define COMATCOPY_K_CN gotoblas -> comatcopy_k_cn +#define COMATCOPY_K_RN gotoblas -> comatcopy_k_rn +#define COMATCOPY_K_CT gotoblas -> comatcopy_k_ct +#define COMATCOPY_K_RT gotoblas -> comatcopy_k_rt +#define COMATCOPY_K_CNC gotoblas -> comatcopy_k_cnc +#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc +#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc +#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc + #endif #define CGEMM_NN cgemm_nn diff --git a/common_level3.h b/common_level3.h index ad978b03c..7e1756e67 100644 --- a/common_level3.h +++ b/common_level3.h @@ -1742,6 +1742,27 @@ int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, B int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); +int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); + +int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); +int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); + +int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); + +int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); +int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); + + #ifdef __CUDACC__ } #endif diff --git a/common_macro.h b/common_macro.h index b93842fbe..f9de3773a 100644 --- a/common_macro.h +++ b/common_macro.h @@ -1738,6 +1738,15 @@ #define AXPBY_K ZAXPBY_K +#define OMATCOPY_K_CN ZOMATCOPY_K_CN +#define OMATCOPY_K_RN ZOMATCOPY_K_RN +#define OMATCOPY_K_CT ZOMATCOPY_K_CT +#define OMATCOPY_K_RT ZOMATCOPY_K_RT +#define OMATCOPY_K_CNC ZOMATCOPY_K_CNC +#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC +#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC +#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC + #else #define AMAX_K CAMAX_K @@ -2141,6 +2150,15 @@ #define AXPBY_K CAXPBY_K +#define OMATCOPY_K_CN COMATCOPY_K_CN +#define OMATCOPY_K_RN COMATCOPY_K_RN +#define OMATCOPY_K_CT COMATCOPY_K_CT +#define OMATCOPY_K_RT COMATCOPY_K_RT +#define OMATCOPY_K_CNC COMATCOPY_K_CNC +#define OMATCOPY_K_RNC COMATCOPY_K_RNC +#define OMATCOPY_K_CTC COMATCOPY_K_CTC +#define OMATCOPY_K_RTC COMATCOPY_K_RTC + #endif #endif diff --git a/common_param.h b/common_param.h index 6c98f6c09..14dbc7e7d 100644 --- a/common_param.h +++ b/common_param.h @@ -826,6 +826,26 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); + int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + + int (*comatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); + + int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + + int (*zomatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); + } gotoblas_t; diff --git a/common_z.h b/common_z.h index 15761f6ed..133dea80c 100644 --- a/common_z.h +++ b/common_z.h @@ -211,6 +211,16 @@ #define ZAXPBY_K zaxpby_k +#define ZOMATCOPY_K_CN zomatcopy_k_cn +#define ZOMATCOPY_K_RN zomatcopy_k_rn +#define ZOMATCOPY_K_CT zomatcopy_k_ct +#define ZOMATCOPY_K_RT zomatcopy_k_rt +#define ZOMATCOPY_K_CNC zomatcopy_k_cnc +#define ZOMATCOPY_K_RNC zomatcopy_k_rnc +#define ZOMATCOPY_K_CTC zomatcopy_k_ctc +#define ZOMATCOPY_K_RTC zomatcopy_k_rtc + + #else #define ZAMAX_K gotoblas -> zamax_k @@ -383,6 +393,16 @@ #define ZLASWP_NCOPY gotoblas -> zlaswp_ncopy #define ZAXPBY_K gotoblas -> zaxpby_k + +#define ZOMATCOPY_K_CN gotoblas -> zomatcopy_k_cn +#define ZOMATCOPY_K_RN gotoblas -> zomatcopy_k_rn +#define ZOMATCOPY_K_CT gotoblas -> zomatcopy_k_ct +#define ZOMATCOPY_K_RT gotoblas -> zomatcopy_k_rt +#define ZOMATCOPY_K_CNC gotoblas -> zomatcopy_k_cnc +#define ZOMATCOPY_K_RNC gotoblas -> zomatcopy_k_rnc +#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc +#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc + #endif #define ZGEMM_NN zgemm_nn diff --git a/interface/Makefile b/interface/Makefile index 0d302eefb..da7d11dd4 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -93,7 +93,8 @@ CBLAS2OBJS = \ CBLAS3OBJS = \ cgemm.$(SUFFIX) csymm.$(SUFFIX) ctrmm.$(SUFFIX) \ ctrsm.$(SUFFIX) csyrk.$(SUFFIX) csyr2k.$(SUFFIX) \ - chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) + chemm.$(SUFFIX) cherk.$(SUFFIX) cher2k.$(SUFFIX) \ + comatcopy.$(SUFFIX) ZBLAS1OBJS = \ zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ @@ -120,7 +121,8 @@ ZBLAS2OBJS = \ ZBLAS3OBJS = \ zgemm.$(SUFFIX) zsymm.$(SUFFIX) ztrmm.$(SUFFIX) \ ztrsm.$(SUFFIX) zsyrk.$(SUFFIX) zsyr2k.$(SUFFIX) \ - zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) + zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ + zomatcopy.$(SUFFIX) ifdef SUPPORT_GEMM3M @@ -2033,3 +2035,10 @@ domatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c somatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : omatcopy.c $(CC) -c $(CFLAGS) $< -o $(@F) +comatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : zomatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + +zomatcopy.$(SUFFIX) domatcopy.$(PSUFFIX) : zomatcopy.c + $(CC) -c $(CFLAGS) $< -o $(@F) + + diff --git a/interface/zomatcopy.c b/interface/zomatcopy.c new file mode 100644 index 000000000..819e57bab --- /dev/null +++ b/interface/zomatcopy.c @@ -0,0 +1,154 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +/*********************************************************** + * 2014/06/09 Saar +***********************************************************/ + +#include +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#if defined(DOUBLE) +#define ERROR_NAME "ZOMATCOPY" +#else +#define ERROR_NAME "COMATCOPY" +#endif + +#define BlasRowMajor 0 +#define BlasColMajor 1 +#define BlasNoTrans 0 +#define BlasTrans 1 +#define BlasTransConj 2 +#define BlasConj 3 + +void NAME( char* ORDER, char* TRANS, blasint *rows, blasint *cols, FLOAT *alpha, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb) +{ + + char Order, Trans; + int order=-1,trans=-1; + blasint info = -1; + + Order = *ORDER; + Trans = *TRANS; + + TOUPPER(Order); + TOUPPER(Trans); + + if ( Order == 'C' ) order = BlasColMajor; + if ( Order == 'R' ) order = BlasRowMajor; + if ( Trans == 'N' ) trans = BlasNoTrans; + if ( Trans == 'T' ) trans = BlasTrans; + if ( Trans == 'C' ) trans = BlasTransConj; + if ( Trans == 'R' ) trans = BlasConj; + + if ( order == BlasColMajor) + { + if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; + if ( trans == BlasConj && *ldb < *rows ) info = 9; + if ( trans == BlasTrans && *ldb < *cols ) info = 9; + if ( trans == BlasTransConj && *ldb < *cols ) info = 9; + } + if ( order == BlasRowMajor) + { + if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; + if ( trans == BlasConj && *ldb < *cols ) info = 9; + if ( trans == BlasTrans && *ldb < *rows ) info = 9; + if ( trans == BlasTransConj && *ldb < *rows ) info = 9; + } + + if ( order == BlasColMajor && *lda < *rows ) info = 7; + if ( order == BlasRowMajor && *lda < *cols ) info = 7; + if ( *cols <= 0 ) info = 4; + if ( *rows <= 0 ) info = 3; + if ( trans < 0 ) info = 2; + if ( order < 0 ) info = 1; + + if (info >= 0) { + BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME)); + return; + } + + if ( order == BlasColMajor ) + { + + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_CN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasConj ) + { + OMATCOPY_K_CNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasTrans ) + { + OMATCOPY_K_CT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasTransConj ) + { + OMATCOPY_K_CTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + + } + else + { + + if ( trans == BlasNoTrans ) + { + OMATCOPY_K_RN(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasConj ) + { + OMATCOPY_K_RNC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasTrans ) + { + OMATCOPY_K_RT(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + if ( trans == BlasTransConj ) + { + OMATCOPY_K_RTC(*rows, *cols, alpha[0], alpha[1], a, *lda, b, *ldb ); + return; + } + + } + + return; + +} + + diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index b03d2b9d3..76857a2ba 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -329,6 +329,18 @@ DBLASOBJS += \ domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) +CBLASOBJS += \ + comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ + comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) + +ZBLASOBJS += \ + zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ + zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ + zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ + zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) + SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) SGEMMITCOPYOBJ_P = $(SGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) @@ -3256,61 +3268,169 @@ DOMATCOPY_CN = ../arm/omatcopy_cn.c endif $(KDIR)domatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ ifndef DOMATCOPY_RN DOMATCOPY_RN = ../arm/omatcopy_rn.c endif $(KDIR)domatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RN) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ ifndef DOMATCOPY_CT DOMATCOPY_CT = ../arm/omatcopy_ct.c endif $(KDIR)domatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_CT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -UROWM $< -o $@ ifndef DOMATCOPY_RT DOMATCOPY_RT = ../arm/omatcopy_rt.c endif $(KDIR)domatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(DOMATCOPY_RT) - $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -c -DDOUBLE -UCOMPLEX -DROWM $< -o $@ ifndef SOMATCOPY_CN SOMATCOPY_CN = ../arm/omatcopy_cn.c endif $(KDIR)somatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_CN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ ifndef SOMATCOPY_RN SOMATCOPY_RN = ../arm/omatcopy_rn.c endif $(KDIR)somatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_RN) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ ifndef SOMATCOPY_CT SOMATCOPY_CT = ../arm/omatcopy_ct.c endif $(KDIR)somatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_CT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -UROWM $< -o $@ ifndef SOMATCOPY_RT SOMATCOPY_RT = ../arm/omatcopy_rt.c endif $(KDIR)somatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SOMATCOPY_RT) - $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@ - - + $(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DROWM $< -o $@ +ifndef COMATCOPY_CN +COMATCOPY_CN = ../arm/zomatcopy_cn.c +endif + +$(KDIR)comatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CN) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef COMATCOPY_RN +COMATCOPY_RN = ../arm/zomatcopy_rn.c +endif + +$(KDIR)comatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RN) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef COMATCOPY_CT +COMATCOPY_CT = ../arm/zomatcopy_ct.c +endif + +$(KDIR)comatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CT) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef COMATCOPY_RT +COMATCOPY_RT = ../arm/zomatcopy_rt.c +endif + +$(KDIR)comatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RT) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef COMATCOPY_CNC +COMATCOPY_CNC = ../arm/zomatcopy_cnc.c +endif + +$(KDIR)comatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CNC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef COMATCOPY_RNC +COMATCOPY_RNC = ../arm/zomatcopy_rnc.c +endif + +$(KDIR)comatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RNC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + +ifndef COMATCOPY_CTC +COMATCOPY_CTC = ../arm/zomatcopy_ctc.c +endif + +$(KDIR)comatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_CTC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef COMATCOPY_RTC +COMATCOPY_RTC = ../arm/zomatcopy_rtc.c +endif + +$(KDIR)comatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(COMATCOPY_RTC) + $(CC) $(CFLAGS) -c -UDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ +ifndef ZOMATCOPY_CN +ZOMATCOPY_CN = ../arm/zomatcopy_cn.c +endif + +$(KDIR)zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CN) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef ZOMATCOPY_RN +ZOMATCOPY_RN = ../arm/zomatcopy_rn.c +endif + +$(KDIR)zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RN) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef ZOMATCOPY_CT +ZOMATCOPY_CT = ../arm/zomatcopy_ct.c +endif + +$(KDIR)zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CT) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -UCONJ $< -o $@ + +ifndef ZOMATCOPY_RT +ZOMATCOPY_RT = ../arm/zomatcopy_rt.c +endif + +$(KDIR)zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RT) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -UCONJ $< -o $@ + +ifndef ZOMATCOPY_CNC +ZOMATCOPY_CNC = ../arm/zomatcopy_cnc.c +endif + +$(KDIR)zomatcopy_k_cnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CNC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef ZOMATCOPY_RNC +ZOMATCOPY_RNC = ../arm/zomatcopy_rnc.c +endif + +$(KDIR)zomatcopy_k_rnc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RNC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ + +ifndef ZOMATCOPY_CTC +ZOMATCOPY_CTC = ../arm/zomatcopy_ctc.c +endif + +$(KDIR)zomatcopy_k_ctc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_CTC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -UROWM -DCONJ $< -o $@ + +ifndef ZOMATCOPY_RTC +ZOMATCOPY_RTC = ../arm/zomatcopy_rtc.c +endif + +$(KDIR)zomatcopy_k_rtc$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZOMATCOPY_RTC) + $(CC) $(CFLAGS) -c -DDOUBLE -DCOMPLEX -DROWM -DCONJ $< -o $@ diff --git a/kernel/arm/zomatcopy_cn.c b/kernel/arm/zomatcopy_cn.c new file mode 100644 index 000000000..28bbb6127 --- /dev/null +++ b/kernel/arm/zomatcopy_cn.c @@ -0,0 +1,70 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +/***************************************************** + * 2014/06/09 Saar + * + * Order ColMajor + * No Trans + * +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i,j,ia; + FLOAT *aptr,*bptr; + + if ( rows <= 0 ) return(0); + if ( cols <= 0 ) return(0); + + aptr = a; + bptr = b; + + lda *= 2; + ldb *= 2; + + for ( i=0; i