Merge pull request #4479 from XiWeiGu/loongarch-opt-axpby
Loongarch opt axpby
This commit is contained in:
@@ -58,6 +58,8 @@ ZAXPYKERNEL = caxpy_lsx.S
|
||||
|
||||
SAXPBYKERNEL = axpby_lsx.S
|
||||
DAXPBYKERNEL = axpby_lsx.S
|
||||
CAXPBYKERNEL = caxpby_lsx.S
|
||||
ZAXPBYKERNEL = caxpby_lsx.S
|
||||
|
||||
SSUMKERNEL = sum_lsx.S
|
||||
DSUMKERNEL = sum_lsx.S
|
||||
|
||||
@@ -58,6 +58,8 @@ ZAXPYKERNEL = caxpy_lasx.S
|
||||
|
||||
SAXPBYKERNEL = axpby_lasx.S
|
||||
DAXPBYKERNEL = axpby_lasx.S
|
||||
CAXPBYKERNEL = caxpby_lasx.S
|
||||
ZAXPBYKERNEL = caxpby_lasx.S
|
||||
|
||||
SSUMKERNEL = sum_lasx.S
|
||||
DSUMKERNEL = sum_lasx.S
|
||||
|
||||
@@ -57,10 +57,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
PROLOGUE
|
||||
|
||||
bge $r0, N, .L999
|
||||
li.d TEMP, 1
|
||||
movgr2fr.d a1, $r0
|
||||
ffint.s.l a1, a1
|
||||
slli.d TEMP, TEMP, BASE_SHIFT
|
||||
slli.d INCX, INCX, BASE_SHIFT
|
||||
slli.d INCY, INCY, BASE_SHIFT
|
||||
MTG t1, ALPHA
|
||||
@@ -75,6 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
xvreplgr2vr.w VXB, t2
|
||||
xvreplgr2vr.w VXZ, t3
|
||||
#endif
|
||||
// If incx == 0 || incy == 0, do one by one
|
||||
and TEMP, INCX, INCY
|
||||
or I, N, N
|
||||
beqz TEMP, .L998
|
||||
|
||||
li.d TEMP, 1
|
||||
slli.d TEMP, TEMP, BASE_SHIFT
|
||||
srai.d I, N, 3
|
||||
bne INCX, TEMP, .L20
|
||||
bne INCY, TEMP, .L12 // INCX==1 and INCY!=1
|
||||
|
||||
@@ -57,10 +57,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
PROLOGUE
|
||||
|
||||
bge $r0, N, .L999
|
||||
li.d TEMP, 1
|
||||
movgr2fr.d a1, $r0
|
||||
ffint.s.l a1, a1
|
||||
slli.d TEMP, TEMP, BASE_SHIFT
|
||||
slli.d INCX, INCX, BASE_SHIFT
|
||||
slli.d INCY, INCY, BASE_SHIFT
|
||||
MTG t1, ALPHA
|
||||
@@ -75,6 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
vreplgr2vr.w VXB, t2
|
||||
vreplgr2vr.w VXZ, t3
|
||||
#endif
|
||||
// If incx == 0 || incy == 0, do one by one
|
||||
and TEMP, INCX, INCY
|
||||
or I, N, N
|
||||
beqz TEMP, .L998
|
||||
|
||||
li.d TEMP, 1
|
||||
slli.d TEMP, TEMP, BASE_SHIFT
|
||||
srai.d I, N, 3
|
||||
bne INCX, TEMP, .L20
|
||||
bne INCY, TEMP, .L12 // INCX==1 and INCY!=1
|
||||
|
||||
1046
kernel/loongarch64/caxpby_lasx.S
Normal file
1046
kernel/loongarch64/caxpby_lasx.S
Normal file
File diff suppressed because it is too large
Load Diff
1029
kernel/loongarch64/caxpby_lsx.S
Normal file
1029
kernel/loongarch64/caxpby_lsx.S
Normal file
File diff suppressed because it is too large
Load Diff
@@ -17,6 +17,7 @@ else ()
|
||||
test_swap.c
|
||||
test_zscal.c
|
||||
test_amin.c
|
||||
test_axpby.c
|
||||
)
|
||||
endif ()
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ UTESTBIN=openblas_utest
|
||||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
OBJS=utest_main.o test_min.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o test_dnrm2.o test_zscal.o \
|
||||
test_amin.o
|
||||
test_amin.o test_axpby.o
|
||||
#test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o
|
||||
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
|
||||
320
utest/test_axpby.c
Normal file
320
utest/test_axpby.c
Normal file
@@ -0,0 +1,320 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011-2024, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
#include "openblas_utest.h"
|
||||
|
||||
#ifdef BUILD_SINGLE
|
||||
CTEST(axpby, saxpby_inc_0)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 0, incY = 0;
|
||||
float alpha = 1.0, beta = 2.0;
|
||||
float x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(saxpby)(&N, &alpha, x1, &incX, &beta, y1, &incY);
|
||||
|
||||
float x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y2[] = { 1535.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
for(i = 0; i < N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], SINGLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
CTEST(axpby, saxpby_inc_1)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 1, incY = 1;
|
||||
float alpha = 0.25, beta = 0.75;
|
||||
float x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(saxpby)(&N, &alpha, x1, &incX, &beta, y1, &incY);
|
||||
|
||||
float x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y2[] = { 1.75, 3.75, 5.75, 7.75, 1.75, 3.75, 5.75, 7.75, 9.75 };
|
||||
|
||||
for(i = 0; i < N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], SINGLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
CTEST(axpby, saxpby_inc_2)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 2, incY = 2;
|
||||
float alpha = 0.25, beta = 0.75;
|
||||
float x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(saxpby)(&N, &alpha, x1, &incX, &beta, y1, &incY);
|
||||
|
||||
float x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y2[] = { 1.75, 4.00, 5.75, 8.00, 1.75, 4.00, 5.75, 8.00,
|
||||
9.75, 2.00, 3.75, 6.00, 7.75, 2.00, 3.75, 6.00,
|
||||
7.75, 10.00 };
|
||||
|
||||
for(i = 0; i < 2 * N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], SINGLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_DOUBLE
|
||||
CTEST(axpby, daxpby_inc_0)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 0, incY = 0;
|
||||
double alpha = 1.0, beta = 2.0;
|
||||
double x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(daxpby)(&N, &alpha, x1, &incX, &beta, y1, &incY);
|
||||
|
||||
double x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y2[] = { 1535.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
for(i = 0; i < N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
CTEST(axpby, daxpby_inc_1)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 1, incY = 1;
|
||||
double alpha = 0.25, beta = 0.75;
|
||||
double x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(daxpby)(&N, &alpha, x1, &incX, &beta, y1, &incY);
|
||||
|
||||
double x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y2[] = { 1.75, 3.75, 5.75, 7.75, 1.75, 3.75, 5.75, 7.75, 9.75 };
|
||||
|
||||
for(i = 0; i < N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
CTEST(axpby, daxpby_inc_2)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 2, incY = 2;
|
||||
double alpha = 0.25, beta = 0.75;
|
||||
double x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(daxpby)(&N, &alpha, x1, &incX, &beta, y1, &incY);
|
||||
|
||||
double x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y2[] = { 1.75, 4.00, 5.75, 8.00, 1.75, 4.00, 5.75, 8.00,
|
||||
9.75, 2.00, 3.75, 6.00, 7.75, 2.00, 3.75, 6.00,
|
||||
7.75, 10.00 };
|
||||
|
||||
for(i = 0; i < 2 * N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_COMPLEX
|
||||
CTEST(axpby, caxpby_inc_0)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 0, incY = 0;
|
||||
float alpha[] = { 1.0, 2.0 }, beta[] = { 2.0, 1.0 };
|
||||
float x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(caxpby)(&N, alpha, x1, &incX, beta, y1, &incY);
|
||||
|
||||
float x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y2[] = { 9355.0, -8865.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0,
|
||||
10.0, 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
for(i = 0; i < 2 * N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], SINGLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
CTEST(axpby, caxpby_inc_1)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 1, incY = 1;
|
||||
float alpha[] = { 0.25, 0.25 }, beta[] = { 0.75, 0.75 };
|
||||
float x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(caxpby)(&N, alpha, x1, &incX, beta, y1, &incY);
|
||||
|
||||
float x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y2[] = { -2.0, 5.5, -2.0, 13.5, -2.0, 5.5, -2.0, 13.5,
|
||||
8.0, 11.5, -2.0, 9.5, 6.0, 9.5, -2.0, 9.5, -2.0, 17.5 };
|
||||
|
||||
for(i = 0; i < 2 * N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], SINGLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
CTEST(axpby, caxpby_inc_2)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 2, incY = 2;
|
||||
float alpha[] = { 0.25, 0.25 }, beta[] = { 0.75, 0.75 };
|
||||
float x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(caxpby)(&N, &alpha, x1, &incX, &beta, y1, &incY);
|
||||
|
||||
float x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
float y2[] = { -2.0, 5.5, 6.0, 8.0, -2.0, 5.5, 6.0, 8.0, 8.0,
|
||||
11.5, 4.0, 6.0, 6.0, 9.5, 4.0, 6.0, -2.0, 17.5,
|
||||
2.0, 4.0, -2.0, 13.5, 2.0, 4.0, -2.0, 13.5, 10.0,
|
||||
2.0, -2.0, 9.5, 8.0, 2.0, -2.0, 9.5, 8.0, 10.0 };
|
||||
|
||||
for(i = 0; i < 4 * N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], SINGLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_COMPLEX16
|
||||
CTEST(axpby, zaxpby_inc_0)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 0, incY = 0;
|
||||
double alpha[] = { 1.0, 2.0 }, beta[] = { 2.0, 1.0 };
|
||||
double x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(zaxpby)(&N, alpha, x1, &incX, beta, y1, &incY);
|
||||
|
||||
double x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y2[] = { 9355.0, -8865.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0,
|
||||
10.0, 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
for(i = 0; i < 2 * N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
CTEST(axpby, zaxpby_inc_1)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 1, incY = 1;
|
||||
double alpha[] = { 0.25, 0.25 }, beta[] = { 0.75, 0.75 };
|
||||
double x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(zaxpby)(&N, alpha, x1, &incX, beta, y1, &incY);
|
||||
|
||||
double x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y2[] = { -2.0, 5.5, -2.0, 13.5, -2.0, 5.5, -2.0, 13.5,
|
||||
8.0, 11.5, -2.0, 9.5, 6.0, 9.5, -2.0, 9.5, -2.0, 17.5 };
|
||||
|
||||
for(i = 0; i < 2 * N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
|
||||
}
|
||||
}
|
||||
|
||||
CTEST(axpby, zaxpby_inc_2)
|
||||
{
|
||||
blasint i;
|
||||
blasint N = 9, incX = 2, incY = 2;
|
||||
double alpha[] = { 0.25, 0.25 }, beta[] = { 0.75, 0.75 };
|
||||
double x1[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y1[] = { 2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0,
|
||||
2.0, 4.0, 6.0, 8.0, 2.0, 4.0, 6.0, 8.0, 10.0 };
|
||||
|
||||
BLASFUNC(zaxpby)(&N, &alpha, x1, &incX, &beta, y1, &incY);
|
||||
|
||||
double x2[] = { 1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0,
|
||||
1.0, 3.0, 5.0, 7.0, 1.0, 3.0, 5.0, 7.0, 9.0 };
|
||||
double y2[] = { -2.0, 5.5, 6.0, 8.0, -2.0, 5.5, 6.0, 8.0, 8.0,
|
||||
11.5, 4.0, 6.0, 6.0, 9.5, 4.0, 6.0, -2.0, 17.5,
|
||||
2.0, 4.0, -2.0, 13.5, 2.0, 4.0, -2.0, 13.5, 10.0,
|
||||
2.0, -2.0, 9.5, 8.0, 2.0, -2.0, 9.5, 8.0, 10.0 };
|
||||
|
||||
for(i = 0; i < 4 * N; i++){
|
||||
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
|
||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user