Provide iaxpy and cblas_iaxpy for integer vectors. make INTEGER_PRECISION=1

This commit is contained in:
Zhang Xianyi 2015-07-01 03:11:27 +08:00
parent 3f1b57668e
commit 034ffa93fa
14 changed files with 150 additions and 4 deletions

View File

@ -114,6 +114,9 @@ NO_AFFINITY = 1
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
# QUAD_PRECISION = 1
# Support for integer matrix and vector (e.g. iaxpy)
# INTEGER_PRECISION = 1
# Theads are still working for a while after finishing BLAS operation
# to reduce thread activate/deactivate overhead. You can determine
# time out to improve performance. This number should be from 4 to 30

View File

@ -309,6 +309,10 @@ CCOMMON_OPT += -DQUAD_PRECISION
NO_EXPRECISION = 1
endif
ifdef INTEGER_PRECISION
CCOMMON_OPT += -DINTEGER_PRECISION
endif
ifneq ($(ARCH), x86)
ifneq ($(ARCH), x86_64)
NO_EXPRECISION = 1

View File

@ -4,6 +4,7 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
@ -22,12 +23,18 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif
ifdef INTEGER_PRECISION
BLASOBJS += $(IBLASOBJS)
BLASOBJS_P += $(IBLASOBJS_P)
endif
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
@ -35,6 +42,7 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
libs :: $(BLASOBJS) $(COMMONOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

View File

@ -276,6 +276,11 @@ typedef int blasint;
#define SIZE 8
#define BASE_SHIFT 3
#define ZBASE_SHIFT 4
#elif defined(INTEGER) //extend for integer matrix
#define FLOAT int
#define SIZE 4
#define BASE_SHIFT 2
#define ZBASE_SHIFT 3
#else
#define FLOAT float
#define SIZE 4

9
common_i.h Normal file
View File

@ -0,0 +1,9 @@
#ifndef COMMON_I_H
#define COMMON_I_H
#ifndef DYNAMIC_ARCH
#define IAXPYU_K iaxpy_k
#else
#error
#endif
#endif

View File

@ -93,6 +93,7 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *);
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *);
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);

View File

@ -60,6 +60,8 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int,
int *, BLASLONG, int *, BLASLONG, int *, BLASLONG);
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,

View File

@ -47,6 +47,10 @@
#include "common_z.h"
#include "common_x.h"
#ifdef INTEGER_PRECISION
#include "common_i.h"
#endif
#ifndef COMPLEX
#ifdef XDOUBLE
@ -635,6 +639,9 @@
#define OMATCOPY_K_CT DOMATCOPY_K_CT
#define OMATCOPY_K_RT DOMATCOPY_K_RT
#define GEADD_K DGEADD_K
#elif defined(INTEGER)
#define AXPYU_K IAXPYU_K
#else
#define AMAX_K SAMAX_K

View File

@ -65,6 +65,7 @@ extern int blas_omp_linked;
#define BLAS_XDOUBLE 0x0002U
#define BLAS_REAL 0x0000U
#define BLAS_COMPLEX 0x0004U
#define BLAS_INTEGER 0x0008U
#define BLAS_TRANSA 0x0030U /* 2bit */
#define BLAS_TRANSA_N 0x0000U

View File

@ -189,6 +189,20 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> b, args -> ldb,
args -> c, args -> ldc, sb);
} else
#endif
#ifdef INTEGER_PRECISION
if (mode & BLAS_INTEGER){
/* REAL / Extended Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, int,
int *, BLASLONG, int *, BLASLONG,
int *, BLASLONG, void *) = func;
afunc(args -> m, args -> n, args -> k,
((int *)args -> alpha)[0],
args -> a, args -> lda,
args -> b, args -> ldb,
args -> c, args -> ldc, sb);
} else
#endif
if (mode & BLAS_DOUBLE){
/* REAL / Double */

View File

@ -253,6 +253,15 @@ XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
endif
endif
ifdef INTEGER_PRECISION
IBLAS1OBJS = \
iaxpy.$(SUFFIX)
IBLAS2OBJS =
IBLAS3OBJS =
endif
endif
HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \
@ -343,6 +352,9 @@ CZBLAS3OBJS = \
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \
cblas_zgeadd.$(SUFFIX)
CIBLAS1OBJS = \
cblas_iaxpy.$(SUFFIX)
ifeq ($(SUPPORT_GEMM3M), 1)
@ -372,6 +384,10 @@ ZBLAS1OBJS += $(CZBLAS1OBJS)
ZBLAS2OBJS += $(CZBLAS2OBJS)
ZBLAS3OBJS += $(CZBLAS3OBJS)
IBLAS1OBJS += $(CIBLAS1OBJS)
IBLAS2OBJS += $(CIBLAS2OBJS)
IBLAS3OBJS += $(CIBLAS3OBJS)
endif
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
@ -380,6 +396,7 @@ QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS)
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
IBLASOBJS = $(IBLAS1OBJS) $(IBLAS2OBJS) $(IBLAS3OBJS)
#SLAPACKOBJS = \
# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
@ -458,6 +475,10 @@ ifdef QUAD_PRECISION
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
endif
ifdef INTEGER_PRECISION
FUNCOBJS += $(IBLASOBJS)
endif
FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=)
include $(TOPDIR)/Makefile.tail
@ -476,17 +497,18 @@ endif
clean ::
@rm -f functable.h
level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS)
level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) $(IBLAS1OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) $(IBLAS2OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(IBLAS3OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
$(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) \
$(CIBLASOBJS) $(CIBLASOBJS_P) : override CFLAGS += -DCBLAS
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
$(CC) $(CFLAGS) -c $< -o $(@F)
@ -725,6 +747,9 @@ saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c
daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c
$(CC) $(CFLAGS) -c $< -o $(@F)
iaxpy.$(SUFFIX) iaxpy.$(PSUFFIX) : axpy.c
$(CC) $(CFLAGS) -c $< -o $(@F)
qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c
$(CC) $(CFLAGS) -c $< -o $(@F)
@ -1437,6 +1462,9 @@ cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c
cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
cblas_iaxpy.$(SUFFIX) cblas_iaxpy.$(PSUFFIX) : axpy.c
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)

View File

@ -103,6 +103,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
mode = BLAS_XDOUBLE | BLAS_REAL;
#elif defined(DOUBLE)
mode = BLAS_DOUBLE | BLAS_REAL;
#elif defined(INTEGER)
mode = BLAS_INTEGER | BLAS_REAL;
#else
mode = BLAS_SINGLE | BLAS_REAL;
#endif

View File

@ -210,6 +210,10 @@ ifndef XAXPYKERNEL
XAXPYKERNEL = zaxpy.S
endif
ifndef IAXPYKERNEL
IAXPYKERNEL = ../generic/iaxpy.c
endif
### COPY ###
ifndef SCOPYKERNEL
@ -471,6 +475,9 @@ QBLASOBJS += \
qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \
qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX)
IBLASOBJS += \
iaxpy_k$(TSUFFIX).$(SUFFIX)
CBLASOBJS += \
camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \
casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \
@ -645,6 +652,9 @@ $(KDIR)daxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)daxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KE
$(KDIR)qaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)qaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QAXPYKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@
$(KDIR)iaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)iaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(IAXPYKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DINTEGER $< -o $@
$(KDIR)caxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPYKERNEL)
$(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@

52
kernel/generic/iaxpy.c Normal file
View File

@ -0,0 +1,52 @@
/***************************************************************************
Copyright (c) 2015, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, int da, int *x, BLASLONG inc_x, int *y, BLASLONG inc_y, int *dummy, BLASLONG dummy2)
{
BLASLONG i=0;
BLASLONG ix,iy;
if ( n < 0 ) return(0);
if ( da == 0 ) return(0);
ix = 0;
iy = 0;
while(i < n)
{
y[iy] += da * x[ix] ;
ix += inc_x ;
iy += inc_y ;
i++ ;
}
return 0;
}