Ref #285: added axpby kernels
This commit is contained in:
@@ -432,18 +432,38 @@ ifndef LSAME_KERNEL
|
||||
LSAME_KERNEL = lsame.S
|
||||
endif
|
||||
|
||||
### AXPBY ###
|
||||
|
||||
ifndef SAXPBYKERNEL
|
||||
SAXPBYKERNEL = ../arm/axpby.c
|
||||
endif
|
||||
|
||||
ifndef DAXPBYKERNEL
|
||||
DAXPBYKERNEL = ../arm/axpby.c
|
||||
endif
|
||||
|
||||
ifndef CAXPBYKERNEL
|
||||
CAXPBYKERNEL = ../arm/zaxpby.c
|
||||
endif
|
||||
|
||||
ifndef ZAXPBYKERNEL
|
||||
ZAXPBYKERNEL = ../arm/zaxpby.c
|
||||
endif
|
||||
|
||||
SBLASOBJS += \
|
||||
samax_k$(TSUFFIX).$(SUFFIX) samin_k$(TSUFFIX).$(SUFFIX) smax_k$(TSUFFIX).$(SUFFIX) smin_k$(TSUFFIX).$(SUFFIX) \
|
||||
isamax_k$(TSUFFIX).$(SUFFIX) isamin_k$(TSUFFIX).$(SUFFIX) ismax_k$(TSUFFIX).$(SUFFIX) ismin_k$(TSUFFIX).$(SUFFIX) \
|
||||
sasum_k$(TSUFFIX).$(SUFFIX) saxpy_k$(TSUFFIX).$(SUFFIX) scopy_k$(TSUFFIX).$(SUFFIX) \
|
||||
sdot_k$(TSUFFIX).$(SUFFIX) sdsdot_k$(TSUFFIX).$(SUFFIX) dsdot_k$(TSUFFIX).$(SUFFIX) \
|
||||
snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX)
|
||||
snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) \
|
||||
saxpby_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DBLASOBJS += \
|
||||
damax_k$(TSUFFIX).$(SUFFIX) damin_k$(TSUFFIX).$(SUFFIX) dmax_k$(TSUFFIX).$(SUFFIX) dmin_k$(TSUFFIX).$(SUFFIX) \
|
||||
idamax_k$(TSUFFIX).$(SUFFIX) idamin_k$(TSUFFIX).$(SUFFIX) idmax_k$(TSUFFIX).$(SUFFIX) idmin_k$(TSUFFIX).$(SUFFIX) \
|
||||
dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \
|
||||
dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX)
|
||||
dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \
|
||||
daxpby_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
QBLASOBJS += \
|
||||
qamax_k$(TSUFFIX).$(SUFFIX) qamin_k$(TSUFFIX).$(SUFFIX) qmax_k$(TSUFFIX).$(SUFFIX) qmin_k$(TSUFFIX).$(SUFFIX) \
|
||||
@@ -455,13 +475,13 @@ CBLASOBJS += \
|
||||
camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \
|
||||
casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \
|
||||
cdotc_k$(TSUFFIX).$(SUFFIX) cdotu_k$(TSUFFIX).$(SUFFIX) cnrm2_k$(TSUFFIX).$(SUFFIX) csrot_k$(TSUFFIX).$(SUFFIX) \
|
||||
cscal_k$(TSUFFIX).$(SUFFIX) cswap_k$(TSUFFIX).$(SUFFIX)
|
||||
cscal_k$(TSUFFIX).$(SUFFIX) cswap_k$(TSUFFIX).$(SUFFIX) caxpby_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZBLASOBJS += \
|
||||
zamax_k$(TSUFFIX).$(SUFFIX) zamin_k$(TSUFFIX).$(SUFFIX) izamax_k$(TSUFFIX).$(SUFFIX) izamin_k$(TSUFFIX).$(SUFFIX) \
|
||||
zasum_k$(TSUFFIX).$(SUFFIX) zaxpy_k$(TSUFFIX).$(SUFFIX) zaxpyc_k$(TSUFFIX).$(SUFFIX) zcopy_k$(TSUFFIX).$(SUFFIX) \
|
||||
zdotc_k$(TSUFFIX).$(SUFFIX) zdotu_k$(TSUFFIX).$(SUFFIX) znrm2_k$(TSUFFIX).$(SUFFIX) zdrot_k$(TSUFFIX).$(SUFFIX) \
|
||||
zscal_k$(TSUFFIX).$(SUFFIX) zswap_k$(TSUFFIX).$(SUFFIX)
|
||||
zscal_k$(TSUFFIX).$(SUFFIX) zswap_k$(TSUFFIX).$(SUFFIX) zaxpby_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
XBLASOBJS += \
|
||||
xamax_k$(TSUFFIX).$(SUFFIX) xamin_k$(TSUFFIX).$(SUFFIX) ixamax_k$(TSUFFIX).$(SUFFIX) ixamin_k$(TSUFFIX).$(SUFFIX) \
|
||||
@@ -765,3 +785,17 @@ $(KDIR)zswap_k$(TSUFFIX).$(SUFFIX) $(KDIR)zswap_k$(TPSUFFIX).$(PSUFFIX) : $(KE
|
||||
|
||||
$(KDIR)xswap_k$(TSUFFIX).$(SUFFIX) $(KDIR)xswap_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSWAPKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)saxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)saxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SAXPBYKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -UDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)daxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)daxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DAXPBYKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)caxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPBYKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@
|
||||
|
||||
$(KDIR)zaxpby_k$(TSUFFIX).$(SUFFIX) $(KDIR)zaxpby_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZAXPBYKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -DDOUBLE $< -o $@
|
||||
|
||||
|
||||
|
||||
96
kernel/arm/axpby.c
Normal file
96
kernel/arm/axpby.c
Normal file
@@ -0,0 +1,96 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
||||
if ( beta == 0.0 )
|
||||
{
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = 0.0 ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = alpha * x[ix] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
if ( alpha == 0.0 )
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = beta * y[iy] ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = alpha * x[ix] + beta * y[iy] ;
|
||||
ix += inc_x ;
|
||||
iy += inc_y ;
|
||||
i++ ;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
117
kernel/arm/zaxpby.c
Normal file
117
kernel/arm/zaxpby.c
Normal file
@@ -0,0 +1,117 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
/***************************************************************************
|
||||
* 2014/06/07 Saar
|
||||
*
|
||||
***************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i,FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0;
|
||||
BLASLONG ix,iy;
|
||||
FLOAT temp;
|
||||
|
||||
if ( n < 0 ) return(0);
|
||||
|
||||
ix = 0;
|
||||
iy = 0;
|
||||
|
||||
BLASLONG inc_x2 = 2 * inc_x;
|
||||
BLASLONG inc_y2 = 2 * inc_y;
|
||||
|
||||
if ( beta_r == 0.0 && beta_i == 0.0)
|
||||
{
|
||||
if ( alpha_r == 0.0 && alpha_i == 0.0 )
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = 0.0 ;
|
||||
y[iy+1] = 0.0 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
y[iy] = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) ;
|
||||
y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) ;
|
||||
ix += inc_x2 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( alpha_r == 0.0 && alpha_i == 0.0 )
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
temp = ( beta_r * y[iy] - beta_i * y[iy+1] ) ;
|
||||
y[iy+1] = ( beta_r * y[iy+1] + beta_i * y[iy] ) ;
|
||||
y[iy] = temp;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
while(i < n)
|
||||
{
|
||||
temp = ( alpha_r * x[ix] - alpha_i * x[ix+1] ) + ( beta_r * y[iy] - beta_i * y[iy+1] ) ;
|
||||
y[iy+1] = ( alpha_r * x[ix+1] + alpha_i * x[ix] ) + ( beta_r * y[iy+1] + beta_i * y[iy] ) ;
|
||||
y[iy] = temp;
|
||||
ix += inc_x2 ;
|
||||
iy += inc_y2 ;
|
||||
i++ ;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
return(0);
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -500,6 +500,12 @@ gotoblas_t TABLE_NAME = {
|
||||
|
||||
SNUMOPT, DNUMOPT, QNUMOPT,
|
||||
|
||||
saxpby_kTS,
|
||||
daxpby_kTS,
|
||||
caxpby_kTS,
|
||||
zaxpby_kTS
|
||||
|
||||
|
||||
};
|
||||
|
||||
#ifdef ARCH_X86
|
||||
|
||||
Reference in New Issue
Block a user