Provide iaxpy and cblas_iaxpy for integer vectors. make INTEGER_PRECISION=1
This commit is contained in:
parent
3f1b57668e
commit
034ffa93fa
|
@ -114,6 +114,9 @@ NO_AFFINITY = 1
|
||||||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
|
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
|
||||||
# QUAD_PRECISION = 1
|
# QUAD_PRECISION = 1
|
||||||
|
|
||||||
|
# Support for integer matrix and vector (e.g. iaxpy)
|
||||||
|
# INTEGER_PRECISION = 1
|
||||||
|
|
||||||
# Theads are still working for a while after finishing BLAS operation
|
# Theads are still working for a while after finishing BLAS operation
|
||||||
# to reduce thread activate/deactivate overhead. You can determine
|
# to reduce thread activate/deactivate overhead. You can determine
|
||||||
# time out to improve performance. This number should be from 4 to 30
|
# time out to improve performance. This number should be from 4 to 30
|
||||||
|
|
|
@ -309,6 +309,10 @@ CCOMMON_OPT += -DQUAD_PRECISION
|
||||||
NO_EXPRECISION = 1
|
NO_EXPRECISION = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef INTEGER_PRECISION
|
||||||
|
CCOMMON_OPT += -DINTEGER_PRECISION
|
||||||
|
endif
|
||||||
|
|
||||||
ifneq ($(ARCH), x86)
|
ifneq ($(ARCH), x86)
|
||||||
ifneq ($(ARCH), x86_64)
|
ifneq ($(ARCH), x86_64)
|
||||||
NO_EXPRECISION = 1
|
NO_EXPRECISION = 1
|
||||||
|
|
|
@ -4,6 +4,7 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
|
||||||
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||||
|
|
||||||
|
@ -22,12 +23,18 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef INTEGER_PRECISION
|
||||||
|
BLASOBJS += $(IBLASOBJS)
|
||||||
|
BLASOBJS_P += $(IBLASOBJS_P)
|
||||||
|
endif
|
||||||
|
|
||||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||||
|
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX
|
||||||
|
|
||||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
|
@ -35,6 +42,7 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
|
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||||
|
|
||||||
libs :: $(BLASOBJS) $(COMMONOBJS)
|
libs :: $(BLASOBJS) $(COMMONOBJS)
|
||||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||||
|
|
5
common.h
5
common.h
|
@ -276,6 +276,11 @@ typedef int blasint;
|
||||||
#define SIZE 8
|
#define SIZE 8
|
||||||
#define BASE_SHIFT 3
|
#define BASE_SHIFT 3
|
||||||
#define ZBASE_SHIFT 4
|
#define ZBASE_SHIFT 4
|
||||||
|
#elif defined(INTEGER) //extend for integer matrix
|
||||||
|
#define FLOAT int
|
||||||
|
#define SIZE 4
|
||||||
|
#define BASE_SHIFT 2
|
||||||
|
#define ZBASE_SHIFT 3
|
||||||
#else
|
#else
|
||||||
#define FLOAT float
|
#define FLOAT float
|
||||||
#define SIZE 4
|
#define SIZE 4
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
#ifndef COMMON_I_H
|
||||||
|
#define COMMON_I_H
|
||||||
|
|
||||||
|
#ifndef DYNAMIC_ARCH
|
||||||
|
#define IAXPYU_K iaxpy_k
|
||||||
|
#else
|
||||||
|
#error
|
||||||
|
#endif
|
||||||
|
#endif
|
|
@ -93,6 +93,7 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo
|
||||||
|
|
||||||
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||||
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||||
|
void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *);
|
||||||
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *);
|
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *);
|
||||||
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||||
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||||
|
|
|
@ -60,6 +60,8 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
|
||||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||||
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
|
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||||
|
int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int,
|
||||||
|
int *, BLASLONG, int *, BLASLONG, int *, BLASLONG);
|
||||||
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
|
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||||
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,
|
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,
|
||||||
|
|
|
@ -47,6 +47,10 @@
|
||||||
#include "common_z.h"
|
#include "common_z.h"
|
||||||
#include "common_x.h"
|
#include "common_x.h"
|
||||||
|
|
||||||
|
#ifdef INTEGER_PRECISION
|
||||||
|
#include "common_i.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
|
|
||||||
|
@ -635,6 +639,9 @@
|
||||||
#define OMATCOPY_K_CT DOMATCOPY_K_CT
|
#define OMATCOPY_K_CT DOMATCOPY_K_CT
|
||||||
#define OMATCOPY_K_RT DOMATCOPY_K_RT
|
#define OMATCOPY_K_RT DOMATCOPY_K_RT
|
||||||
#define GEADD_K DGEADD_K
|
#define GEADD_K DGEADD_K
|
||||||
|
|
||||||
|
#elif defined(INTEGER)
|
||||||
|
#define AXPYU_K IAXPYU_K
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#define AMAX_K SAMAX_K
|
#define AMAX_K SAMAX_K
|
||||||
|
|
|
@ -65,6 +65,7 @@ extern int blas_omp_linked;
|
||||||
#define BLAS_XDOUBLE 0x0002U
|
#define BLAS_XDOUBLE 0x0002U
|
||||||
#define BLAS_REAL 0x0000U
|
#define BLAS_REAL 0x0000U
|
||||||
#define BLAS_COMPLEX 0x0004U
|
#define BLAS_COMPLEX 0x0004U
|
||||||
|
#define BLAS_INTEGER 0x0008U
|
||||||
|
|
||||||
#define BLAS_TRANSA 0x0030U /* 2bit */
|
#define BLAS_TRANSA 0x0030U /* 2bit */
|
||||||
#define BLAS_TRANSA_N 0x0000U
|
#define BLAS_TRANSA_N 0x0000U
|
||||||
|
|
|
@ -189,6 +189,20 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
|
||||||
args -> b, args -> ldb,
|
args -> b, args -> ldb,
|
||||||
args -> c, args -> ldc, sb);
|
args -> c, args -> ldc, sb);
|
||||||
} else
|
} else
|
||||||
|
#endif
|
||||||
|
#ifdef INTEGER_PRECISION
|
||||||
|
if (mode & BLAS_INTEGER){
|
||||||
|
/* REAL / Extended Double */
|
||||||
|
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, int,
|
||||||
|
int *, BLASLONG, int *, BLASLONG,
|
||||||
|
int *, BLASLONG, void *) = func;
|
||||||
|
|
||||||
|
afunc(args -> m, args -> n, args -> k,
|
||||||
|
((int *)args -> alpha)[0],
|
||||||
|
args -> a, args -> lda,
|
||||||
|
args -> b, args -> ldb,
|
||||||
|
args -> c, args -> ldc, sb);
|
||||||
|
} else
|
||||||
#endif
|
#endif
|
||||||
if (mode & BLAS_DOUBLE){
|
if (mode & BLAS_DOUBLE){
|
||||||
/* REAL / Double */
|
/* REAL / Double */
|
||||||
|
|
|
@ -253,6 +253,15 @@ XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef INTEGER_PRECISION
|
||||||
|
|
||||||
|
IBLAS1OBJS = \
|
||||||
|
iaxpy.$(SUFFIX)
|
||||||
|
|
||||||
|
IBLAS2OBJS =
|
||||||
|
IBLAS3OBJS =
|
||||||
|
endif
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \
|
HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \
|
||||||
|
@ -343,6 +352,9 @@ CZBLAS3OBJS = \
|
||||||
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \
|
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \
|
||||||
cblas_zgeadd.$(SUFFIX)
|
cblas_zgeadd.$(SUFFIX)
|
||||||
|
|
||||||
|
CIBLAS1OBJS = \
|
||||||
|
cblas_iaxpy.$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(SUPPORT_GEMM3M), 1)
|
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||||
|
|
||||||
|
@ -372,6 +384,10 @@ ZBLAS1OBJS += $(CZBLAS1OBJS)
|
||||||
ZBLAS2OBJS += $(CZBLAS2OBJS)
|
ZBLAS2OBJS += $(CZBLAS2OBJS)
|
||||||
ZBLAS3OBJS += $(CZBLAS3OBJS)
|
ZBLAS3OBJS += $(CZBLAS3OBJS)
|
||||||
|
|
||||||
|
IBLAS1OBJS += $(CIBLAS1OBJS)
|
||||||
|
IBLAS2OBJS += $(CIBLAS2OBJS)
|
||||||
|
IBLAS3OBJS += $(CIBLAS3OBJS)
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
|
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
|
||||||
|
@ -380,6 +396,7 @@ QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS)
|
||||||
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
||||||
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
||||||
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
||||||
|
IBLASOBJS = $(IBLAS1OBJS) $(IBLAS2OBJS) $(IBLAS3OBJS)
|
||||||
|
|
||||||
#SLAPACKOBJS = \
|
#SLAPACKOBJS = \
|
||||||
# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
|
# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
|
||||||
|
@ -458,6 +475,10 @@ ifdef QUAD_PRECISION
|
||||||
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef INTEGER_PRECISION
|
||||||
|
FUNCOBJS += $(IBLASOBJS)
|
||||||
|
endif
|
||||||
|
|
||||||
FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=)
|
FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=)
|
||||||
|
|
||||||
include $(TOPDIR)/Makefile.tail
|
include $(TOPDIR)/Makefile.tail
|
||||||
|
@ -476,17 +497,18 @@ endif
|
||||||
clean ::
|
clean ::
|
||||||
@rm -f functable.h
|
@rm -f functable.h
|
||||||
|
|
||||||
level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS)
|
level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) $(IBLAS1OBJS)
|
||||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||||
|
|
||||||
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
|
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) $(IBLAS2OBJS)
|
||||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||||
|
|
||||||
level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
|
level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(IBLAS3OBJS)
|
||||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||||
|
|
||||||
$(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
|
$(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
|
||||||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS
|
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) \
|
||||||
|
$(CIBLASOBJS) $(CIBLASOBJS_P) : override CFLAGS += -DCBLAS
|
||||||
|
|
||||||
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
|
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
|
||||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
@ -725,6 +747,9 @@ saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c
|
||||||
daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c
|
daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c
|
||||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
|
iaxpy.$(SUFFIX) iaxpy.$(PSUFFIX) : axpy.c
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c
|
qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c
|
||||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
|
@ -1437,6 +1462,9 @@ cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c
|
||||||
cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c
|
cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c
|
||||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_iaxpy.$(SUFFIX) cblas_iaxpy.$(PSUFFIX) : axpy.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||||
|
|
||||||
cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c
|
cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c
|
||||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||||
|
|
||||||
|
|
|
@ -103,6 +103,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
|
||||||
mode = BLAS_XDOUBLE | BLAS_REAL;
|
mode = BLAS_XDOUBLE | BLAS_REAL;
|
||||||
#elif defined(DOUBLE)
|
#elif defined(DOUBLE)
|
||||||
mode = BLAS_DOUBLE | BLAS_REAL;
|
mode = BLAS_DOUBLE | BLAS_REAL;
|
||||||
|
#elif defined(INTEGER)
|
||||||
|
mode = BLAS_INTEGER | BLAS_REAL;
|
||||||
#else
|
#else
|
||||||
mode = BLAS_SINGLE | BLAS_REAL;
|
mode = BLAS_SINGLE | BLAS_REAL;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -210,6 +210,10 @@ ifndef XAXPYKERNEL
|
||||||
XAXPYKERNEL = zaxpy.S
|
XAXPYKERNEL = zaxpy.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef IAXPYKERNEL
|
||||||
|
IAXPYKERNEL = ../generic/iaxpy.c
|
||||||
|
endif
|
||||||
|
|
||||||
### COPY ###
|
### COPY ###
|
||||||
|
|
||||||
ifndef SCOPYKERNEL
|
ifndef SCOPYKERNEL
|
||||||
|
@ -471,6 +475,9 @@ QBLASOBJS += \
|
||||||
qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \
|
qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \
|
||||||
qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX)
|
qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
IBLASOBJS += \
|
||||||
|
iaxpy_k$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
CBLASOBJS += \
|
CBLASOBJS += \
|
||||||
camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \
|
camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \
|
||||||
casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \
|
casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \
|
||||||
|
@ -645,6 +652,9 @@ $(KDIR)daxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)daxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KE
|
||||||
$(KDIR)qaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)qaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QAXPYKERNEL)
|
$(KDIR)qaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)qaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QAXPYKERNEL)
|
||||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@
|
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@
|
||||||
|
|
||||||
|
$(KDIR)iaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)iaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(IAXPYKERNEL)
|
||||||
|
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DINTEGER $< -o $@
|
||||||
|
|
||||||
$(KDIR)caxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPYKERNEL)
|
$(KDIR)caxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPYKERNEL)
|
||||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@
|
$(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,52 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2015, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, int da, int *x, BLASLONG inc_x, int *y, BLASLONG inc_y, int *dummy, BLASLONG dummy2)
|
||||||
|
{
|
||||||
|
BLASLONG i=0;
|
||||||
|
BLASLONG ix,iy;
|
||||||
|
|
||||||
|
if ( n < 0 ) return(0);
|
||||||
|
if ( da == 0 ) return(0);
|
||||||
|
|
||||||
|
ix = 0;
|
||||||
|
iy = 0;
|
||||||
|
|
||||||
|
while(i < n)
|
||||||
|
{
|
||||||
|
|
||||||
|
y[iy] += da * x[ix] ;
|
||||||
|
ix += inc_x ;
|
||||||
|
iy += inc_y ;
|
||||||
|
i++ ;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue