diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 8b25344c0..f76d5c13f 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -12,6 +12,7 @@ set(BLAS1_REAL_ONLY_SOURCES rotm.c rotmg.c # N.B. these do not have complex counterparts rot.c asum.c + sum.c ) # these will have 'z' prepended for the complex version @@ -124,6 +125,7 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "scamin" ${CBLAS_FLAG} "" "" true "COMPLEX") GenerateNamedObjects("max.c" "USE_ABS" "scamax" ${CBLAS_FLAG} "" "" true "COMPLEX") GenerateNamedObjects("asum.c" "" "scasum" ${CBLAS_FLAG} "" "" true "COMPLEX") + GenerateNamedObjects("sum.c" "" "scsum" ${CBLAS_FLAG} "" "" true "COMPLEX") endif () if (${float_type} STREQUAL "ZCOMPLEX") GenerateNamedObjects("zscal.c" "SSCAL" "dscal" ${CBLAS_FLAG} "" "" false "ZCOMPLEX") @@ -132,6 +134,7 @@ foreach (float_type ${FLOAT_TYPES}) GenerateNamedObjects("max.c" "USE_ABS;USE_MIN" "dzamin" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") GenerateNamedObjects("max.c" "USE_ABS" "dzamax" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") GenerateNamedObjects("asum.c" "" "dzasum" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") + GenerateNamedObjects("sum.c" "" "dzsum" ${CBLAS_FLAG} "" "" true "ZCOMPLEX") endif () endforeach () diff --git a/interface/Makefile b/interface/Makefile index 2b996c7de..f0577796d 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -25,7 +25,7 @@ SBLAS1OBJS = \ saxpy.$(SUFFIX) sswap.$(SUFFIX) \ scopy.$(SUFFIX) sscal.$(SUFFIX) \ sdot.$(SUFFIX) sdsdot.$(SUFFIX) dsdot.$(SUFFIX) \ - sasum.$(SUFFIX) snrm2.$(SUFFIX) \ + sasum.$(SUFFIX) ssum.$(SUFFIX) snrm2.$(SUFFIX) \ smax.$(SUFFIX) samax.$(SUFFIX) ismax.$(SUFFIX) isamax.$(SUFFIX) \ smin.$(SUFFIX) samin.$(SUFFIX) ismin.$(SUFFIX) isamin.$(SUFFIX) \ srot.$(SUFFIX) srotg.$(SUFFIX) srotm.$(SUFFIX) srotmg.$(SUFFIX) \ @@ -51,7 +51,7 @@ DBLAS1OBJS = \ daxpy.$(SUFFIX) dswap.$(SUFFIX) \ dcopy.$(SUFFIX) dscal.$(SUFFIX) \ ddot.$(SUFFIX) \ - dasum.$(SUFFIX) dnrm2.$(SUFFIX) \ + dasum.$(SUFFIX) dsum.$(SUFFIX) dnrm2.$(SUFFIX) \ dmax.$(SUFFIX) damax.$(SUFFIX) idmax.$(SUFFIX) idamax.$(SUFFIX) \ dmin.$(SUFFIX) damin.$(SUFFIX) idmin.$(SUFFIX) idamin.$(SUFFIX) \ drot.$(SUFFIX) drotg.$(SUFFIX) drotm.$(SUFFIX) drotmg.$(SUFFIX) \ @@ -76,7 +76,7 @@ CBLAS1OBJS = \ caxpy.$(SUFFIX) caxpyc.$(SUFFIX) cswap.$(SUFFIX) \ ccopy.$(SUFFIX) cscal.$(SUFFIX) csscal.$(SUFFIX) \ cdotc.$(SUFFIX) cdotu.$(SUFFIX) \ - scasum.$(SUFFIX) scnrm2.$(SUFFIX) \ + scasum.$(SUFFIX) scsum.$(SUFFIX) scnrm2.$(SUFFIX) \ scamax.$(SUFFIX) icamax.$(SUFFIX) \ scamin.$(SUFFIX) icamin.$(SUFFIX) \ csrot.$(SUFFIX) crotg.$(SUFFIX) \ @@ -105,7 +105,7 @@ ZBLAS1OBJS = \ zaxpy.$(SUFFIX) zaxpyc.$(SUFFIX) zswap.$(SUFFIX) \ zcopy.$(SUFFIX) zscal.$(SUFFIX) zdscal.$(SUFFIX) \ zdotc.$(SUFFIX) zdotu.$(SUFFIX) \ - dzasum.$(SUFFIX) dznrm2.$(SUFFIX) \ + dzasum.$(SUFFIX) dzsum.$(SUFFIX) dznrm2.$(SUFFIX) \ dzamax.$(SUFFIX) izamax.$(SUFFIX) \ dzamin.$(SUFFIX) izamin.$(SUFFIX) \ zdrot.$(SUFFIX) zrotg.$(SUFFIX) \ @@ -146,7 +146,7 @@ QBLAS1OBJS = \ qaxpy.$(SUFFIX) qswap.$(SUFFIX) \ qcopy.$(SUFFIX) qscal.$(SUFFIX) \ qdot.$(SUFFIX) \ - qasum.$(SUFFIX) qnrm2.$(SUFFIX) \ + qasum.$(SUFFIX) qsum.$(SUFFIX) qnrm2.$(SUFFIX) \ qmax.$(SUFFIX) qamax.$(SUFFIX) iqmax.$(SUFFIX) iqamax.$(SUFFIX) \ qmin.$(SUFFIX) qamin.$(SUFFIX) iqmin.$(SUFFIX) iqamin.$(SUFFIX) \ qrot.$(SUFFIX) qrotg.$(SUFFIX) qrotm.$(SUFFIX) qrotmg.$(SUFFIX) \ @@ -168,7 +168,7 @@ XBLAS1OBJS = \ xaxpy.$(SUFFIX) xaxpyc.$(SUFFIX) xswap.$(SUFFIX) \ xcopy.$(SUFFIX) xscal.$(SUFFIX) xqscal.$(SUFFIX) \ xdotc.$(SUFFIX) xdotu.$(SUFFIX) \ - qxasum.$(SUFFIX) qxnrm2.$(SUFFIX) \ + qxasum.$(SUFFIX) qxsum.$(SUFFIX) qxnrm2.$(SUFFIX) \ qxamax.$(SUFFIX) ixamax.$(SUFFIX) \ qxamin.$(SUFFIX) ixamin.$(SUFFIX) \ xqrot.$(SUFFIX) xrotg.$(SUFFIX) \ @@ -203,7 +203,7 @@ ifdef QUAD_PRECISION QBLAS1OBJS = \ qaxpy.$(SUFFIX) qswap.$(SUFFIX) \ qcopy.$(SUFFIX) qscal.$(SUFFIX) \ - qasum.$(SUFFIX) qnrm2.$(SUFFIX) \ + qasum.$(SUFFIX) qsum.$(SUFFIX) qnrm2.$(SUFFIX) \ qmax.$(SUFFIX) qamax.$(SUFFIX) iqmax.$(SUFFIX) iqamax.$(SUFFIX) \ qmin.$(SUFFIX) qamin.$(SUFFIX) iqmin.$(SUFFIX) iqamin.$(SUFFIX) \ qrot.$(SUFFIX) qrotg.$(SUFFIX) qrotm.$(SUFFIX) qrotmg.$(SUFFIX) \ @@ -224,7 +224,7 @@ QBLAS3OBJS = \ XBLAS1OBJS = \ xaxpy.$(SUFFIX) xaxpyc.$(SUFFIX) xswap.$(SUFFIX) \ xcopy.$(SUFFIX) xscal.$(SUFFIX) xqscal.$(SUFFIX) \ - qxasum.$(SUFFIX) qxnrm2.$(SUFFIX) \ + qxasum.$(SUFFIX) qxsum.$(SUFFIX) qxnrm2.$(SUFFIX) \ qxamax.$(SUFFIX) ixamax.$(SUFFIX) \ qxamin.$(SUFFIX) ixamin.$(SUFFIX) \ xqrot.$(SUFFIX) xrotg.$(SUFFIX) \ @@ -264,7 +264,7 @@ CSBLAS1OBJS = \ cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \ cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \ cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) cblas_saxpby.$(SUFFIX) \ - cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) + cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) cblas_ssum.$(SUFFIX) CSBLAS2OBJS = \ cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \ @@ -282,7 +282,7 @@ CDBLAS1OBJS = \ cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \ cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \ cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) cblas_daxpby.$(SUFFIX) \ - cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) + cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) cblas_dsum.$(SUFFIX) CDBLAS2OBJS = \ cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \ @@ -303,7 +303,7 @@ CCBLAS1OBJS = \ cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \ cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \ cblas_caxpby.$(SUFFIX) \ - cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) + cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) CCBLAS2OBJS = \ cblas_cgemv.$(SUFFIX) cblas_cgerc.$(SUFFIX) cblas_cgeru.$(SUFFIX) \ @@ -330,7 +330,7 @@ CZBLAS1OBJS = \ cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \ cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \ cblas_zaxpby.$(SUFFIX) \ - cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) + cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) CZBLAS2OBJS = \ @@ -565,6 +565,24 @@ dzasum.$(SUFFIX) dzasum.$(PSUFFIX) : asum.c qxasum.$(SUFFIX) qxasum.$(PSUFFIX) : asum.c $(CC) $(CFLAGS) -c $< -o $(@F) +ssum.$(SUFFIX) ssum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dsum.$(SUFFIX) dsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qsum.$(SUFFIX) qsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +scsum.$(SUFFIX) scsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +dzsum.$(SUFFIX) dzsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + +qxsum.$(SUFFIX) qxsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -c $< -o $(@F) + snrm2.$(SUFFIX) snrm2.$(PSUFFIX) : nrm2.c $(CC) $(CFLAGS) -c $< -o $(@F) @@ -1412,6 +1430,18 @@ cblas_scasum.$(SUFFIX) cblas_scasum.$(PSUFFIX) : asum.c cblas_dzasum.$(SUFFIX) cblas_dzasum.$(PSUFFIX) : asum.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) +cblas_ssum.$(SUFFIX) cblas_ssum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dsum.$(SUFFIX) cblas_dsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_scsum.$(SUFFIX) cblas_scsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + +cblas_dzsum.$(SUFFIX) cblas_dzsum.$(PSUFFIX) : sum.c + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + cblas_sdsdot.$(SUFFIX) cblas_sdsdot.$(PSUFFIX) : sdsdot.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) @@ -1419,7 +1449,7 @@ cblas_dsdot.$(SUFFIX) cblas_dsdot.$(PSUFFIX) : dsdot.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) cblas_sdot.$(SUFFIX) cblas_sdot.$(PSUFFIX) : dot.c - $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) + $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) cblas_ddot.$(SUFFIX) cblas_ddot.$(PSUFFIX) : dot.c $(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) diff --git a/interface/sum.c b/interface/sum.c new file mode 100644 index 000000000..dfdcc5dcc --- /dev/null +++ b/interface/sum.c @@ -0,0 +1,97 @@ +/*********************************************************************/ +/* Copyright 2009, 2010 The University of Texas at Austin. */ +/* All rights reserved. */ +/* */ +/* Redistribution and use in source and binary forms, with or */ +/* without modification, are permitted provided that the following */ +/* conditions are met: */ +/* */ +/* 1. Redistributions of source code must retain the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer. */ +/* */ +/* 2. Redistributions in binary form must reproduce the above */ +/* copyright notice, this list of conditions and the following */ +/* disclaimer in the documentation and/or other materials */ +/* provided with the distribution. */ +/* */ +/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ +/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ +/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ +/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ +/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ +/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ +/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ +/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ +/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ +/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ +/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ +/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ +/* POSSIBILITY OF SUCH DAMAGE. */ +/* */ +/* The views and conclusions contained in the software and */ +/* documentation are those of the authors and should not be */ +/* interpreted as representing official policies, either expressed */ +/* or implied, of The University of Texas at Austin. */ +/*********************************************************************/ + +#include +#include "common.h" +#ifdef FUNCTION_PROFILE +#include "functable.h" +#endif + +#ifndef CBLAS + +FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){ + + BLASLONG n = *N; + BLASLONG incx = *INCX; + FLOATRET ret; + + PRINT_DEBUG_NAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = (FLOATRET)SUM_K(n, x, incx); + + FUNCTION_PROFILE_END(COMPSIZE, n, n); + + IDEBUG_END; + + return ret; +} + +#else +#ifdef COMPLEX +FLOAT CNAME(blasint n, void *vx, blasint incx){ + FLOAT *x = (FLOAT*) vx; +#else +FLOAT CNAME(blasint n, FLOAT *x, blasint incx){ +#endif + + FLOAT ret; + + PRINT_DEBUG_CNAME; + + if (n <= 0) return 0; + + IDEBUG_START; + + FUNCTION_PROFILE_START(); + + ret = SUM_K(n, x, incx); + + FUNCTION_PROFILE_END(COMPSIZE, n, n); + + IDEBUG_END; + + return ret; +} + +#endif