diff --git a/Makefile.rule b/Makefile.rule index 19f3fe3d9..f8632524a 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -114,6 +114,9 @@ NO_AFFINITY = 1 # Support for IEEE quad precision(it's *real* REAL*16)( under testing) # QUAD_PRECISION = 1 +# Support for integer matrix and vector (e.g. iaxpy) +# INTEGER_PRECISION = 1 + # Theads are still working for a while after finishing BLAS operation # to reduce thread activate/deactivate overhead. You can determine # time out to improve performance. This number should be from 4 to 30 diff --git a/Makefile.system b/Makefile.system index 5c3910989..d9d7c80f6 100644 --- a/Makefile.system +++ b/Makefile.system @@ -309,6 +309,10 @@ CCOMMON_OPT += -DQUAD_PRECISION NO_EXPRECISION = 1 endif +ifdef INTEGER_PRECISION +CCOMMON_OPT += -DINTEGER_PRECISION +endif + ifneq ($(ARCH), x86) ifneq ($(ARCH), x86_64) NO_EXPRECISION = 1 diff --git a/Makefile.tail b/Makefile.tail index 2adede1a5..5d79d74b9 100644 --- a/Makefile.tail +++ b/Makefile.tail @@ -4,6 +4,7 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) +IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) @@ -22,12 +23,18 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) endif +ifdef INTEGER_PRECISION +BLASOBJS += $(IBLASOBJS) +BLASOBJS_P += $(IBLASOBJS_P) +endif + $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX +$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) @@ -35,6 +42,7 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) +$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) libs :: $(BLASOBJS) $(COMMONOBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ diff --git a/common.h b/common.h index 320adadcb..7082e1753 100644 --- a/common.h +++ b/common.h @@ -276,6 +276,11 @@ typedef int blasint; #define SIZE 8 #define BASE_SHIFT 3 #define ZBASE_SHIFT 4 +#elif defined(INTEGER) //extend for integer matrix +#define FLOAT int +#define SIZE 4 +#define BASE_SHIFT 2 +#define ZBASE_SHIFT 3 #else #define FLOAT float #define SIZE 4 diff --git a/common_i.h b/common_i.h new file mode 100644 index 000000000..b818dfb7f --- /dev/null +++ b/common_i.h @@ -0,0 +1,9 @@ +#ifndef COMMON_I_H +#define COMMON_I_H + +#ifndef DYNAMIC_ARCH +#define IAXPYU_K iaxpy_k +#else +#error +#endif +#endif diff --git a/common_interface.h b/common_interface.h index 15f69e02f..4ab1dd39f 100644 --- a/common_interface.h +++ b/common_interface.h @@ -93,6 +93,7 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *); void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *); +void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *); void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *); void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *); void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *); diff --git a/common_level1.h b/common_level1.h index 2a1b4f1cf..927c8d4ac 100644 --- a/common_level1.h +++ b/common_level1.h @@ -60,6 +60,8 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); +int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int, + int *, BLASLONG, int *, BLASLONG, int *, BLASLONG); int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double, diff --git a/common_macro.h b/common_macro.h index 8555baa67..cd15bba79 100644 --- a/common_macro.h +++ b/common_macro.h @@ -47,6 +47,10 @@ #include "common_z.h" #include "common_x.h" +#ifdef INTEGER_PRECISION +#include "common_i.h" +#endif + #ifndef COMPLEX #ifdef XDOUBLE @@ -635,6 +639,9 @@ #define OMATCOPY_K_CT DOMATCOPY_K_CT #define OMATCOPY_K_RT DOMATCOPY_K_RT #define GEADD_K DGEADD_K + +#elif defined(INTEGER) +#define AXPYU_K IAXPYU_K #else #define AMAX_K SAMAX_K diff --git a/common_thread.h b/common_thread.h index bd964445e..81c23e5d7 100644 --- a/common_thread.h +++ b/common_thread.h @@ -65,6 +65,7 @@ extern int blas_omp_linked; #define BLAS_XDOUBLE 0x0002U #define BLAS_REAL 0x0000U #define BLAS_COMPLEX 0x0004U +#define BLAS_INTEGER 0x0008U #define BLAS_TRANSA 0x0030U /* 2bit */ #define BLAS_TRANSA_N 0x0000U diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index b3b1ce7bd..eecfd0ac2 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -189,6 +189,20 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ args -> b, args -> ldb, args -> c, args -> ldc, sb); } else +#endif +#ifdef INTEGER_PRECISION + if (mode & BLAS_INTEGER){ + /* REAL / Extended Double */ + void (*afunc)(BLASLONG, BLASLONG, BLASLONG, int, + int *, BLASLONG, int *, BLASLONG, + int *, BLASLONG, void *) = func; + + afunc(args -> m, args -> n, args -> k, + ((int *)args -> alpha)[0], + args -> a, args -> lda, + args -> b, args -> ldb, + args -> c, args -> ldc, sb); + } else #endif if (mode & BLAS_DOUBLE){ /* REAL / Double */ diff --git a/interface/Makefile b/interface/Makefile index 1666d9145..1075b013f 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -253,6 +253,15 @@ XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) endif endif +ifdef INTEGER_PRECISION + +IBLAS1OBJS = \ + iaxpy.$(SUFFIX) + +IBLAS2OBJS = +IBLAS3OBJS = +endif + endif HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \ @@ -343,6 +352,9 @@ CZBLAS3OBJS = \ cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ cblas_zgeadd.$(SUFFIX) +CIBLAS1OBJS = \ + cblas_iaxpy.$(SUFFIX) + ifeq ($(SUPPORT_GEMM3M), 1) @@ -372,6 +384,10 @@ ZBLAS1OBJS += $(CZBLAS1OBJS) ZBLAS2OBJS += $(CZBLAS2OBJS) ZBLAS3OBJS += $(CZBLAS3OBJS) +IBLAS1OBJS += $(CIBLAS1OBJS) +IBLAS2OBJS += $(CIBLAS2OBJS) +IBLAS3OBJS += $(CIBLAS3OBJS) + endif SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) @@ -380,6 +396,7 @@ QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) +IBLASOBJS = $(IBLAS1OBJS) $(IBLAS2OBJS) $(IBLAS3OBJS) #SLAPACKOBJS = \ # sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ @@ -458,6 +475,10 @@ ifdef QUAD_PRECISION FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) endif +ifdef INTEGER_PRECISION +FUNCOBJS += $(IBLASOBJS) +endif + FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) include $(TOPDIR)/Makefile.tail @@ -476,17 +497,18 @@ endif clean :: @rm -f functable.h -level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) +level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) $(IBLAS1OBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ -level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) +level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) $(IBLAS2OBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ -level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) +level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(IBLAS3OBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ -$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS +$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) \ +$(CIBLASOBJS) $(CIBLASOBJS_P) : override CFLAGS += -DCBLAS srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c $(CC) $(CFLAGS) -c $< -o $(@F) @@ -725,6 +747,9 @@ saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c $(CC) $(CFLAGS) -c $< -o $(@F) +iaxpy.$(SUFFIX) iaxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -c $< -o $(@F) + qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c $(CC) $(CFLAGS) -c $< -o $(@F) @@ -1437,6 +1462,9 @@ cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) +cblas_iaxpy.$(SUFFIX) cblas_iaxpy.$(PSUFFIX) : axpy.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) diff --git a/interface/axpy.c b/interface/axpy.c index 61b7b4d78..d391f9f56 100644 --- a/interface/axpy.c +++ b/interface/axpy.c @@ -103,6 +103,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc mode = BLAS_XDOUBLE | BLAS_REAL; #elif defined(DOUBLE) mode = BLAS_DOUBLE | BLAS_REAL; +#elif defined(INTEGER) + mode = BLAS_INTEGER | BLAS_REAL; #else mode = BLAS_SINGLE | BLAS_REAL; #endif diff --git a/kernel/Makefile.L1 b/kernel/Makefile.L1 index 7c7cb2770..dcc9d0d32 100644 --- a/kernel/Makefile.L1 +++ b/kernel/Makefile.L1 @@ -210,6 +210,10 @@ ifndef XAXPYKERNEL XAXPYKERNEL = zaxpy.S endif +ifndef IAXPYKERNEL +IAXPYKERNEL = ../generic/iaxpy.c +endif + ### COPY ### ifndef SCOPYKERNEL @@ -471,6 +475,9 @@ QBLASOBJS += \ qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \ qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX) +IBLASOBJS += \ + iaxpy_k$(TSUFFIX).$(SUFFIX) + CBLASOBJS += \ camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \ @@ -645,6 +652,9 @@ $(KDIR)daxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)daxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KE $(KDIR)qaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)qaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QAXPYKERNEL) $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ +$(KDIR)iaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)iaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(IAXPYKERNEL) + $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DINTEGER $< -o $@ + $(KDIR)caxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPYKERNEL) $(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@ diff --git a/kernel/generic/iaxpy.c b/kernel/generic/iaxpy.c new file mode 100644 index 000000000..9bb7cf164 --- /dev/null +++ b/kernel/generic/iaxpy.c @@ -0,0 +1,52 @@ +/*************************************************************************** +Copyright (c) 2015, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" + +int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, int da, int *x, BLASLONG inc_x, int *y, BLASLONG inc_y, int *dummy, BLASLONG dummy2) +{ + BLASLONG i=0; + BLASLONG ix,iy; + + if ( n < 0 ) return(0); + if ( da == 0 ) return(0); + + ix = 0; + iy = 0; + + while(i < n) + { + + y[iy] += da * x[ix] ; + ix += inc_x ; + iy += inc_y ; + i++ ; + + } + + return 0; +}