Merge branch 'develop' into dev/slewis/merge-from-riscv
This commit is contained in:
commit
3ffd6868d7
6888
benchmark/Makefile
6888
benchmark/Makefile
File diff suppressed because it is too large
Load Diff
13
cblas.h
13
cblas.h
|
@ -101,6 +101,16 @@ CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPE
|
||||||
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||||
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||||
|
|
||||||
|
float cblas_samax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||||
|
double cblas_damax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||||
|
float cblas_scamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||||
|
double cblas_dzamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||||
|
|
||||||
|
float cblas_samin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||||
|
double cblas_damin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||||
|
float cblas_scamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||||
|
double cblas_dzamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||||
|
|
||||||
CBLAS_INDEX cblas_ismax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
CBLAS_INDEX cblas_ismax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
|
||||||
CBLAS_INDEX cblas_idmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
CBLAS_INDEX cblas_idmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
|
||||||
CBLAS_INDEX cblas_icmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
CBLAS_INDEX cblas_icmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
|
||||||
|
@ -116,6 +126,9 @@ void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS
|
||||||
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||||
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||||
|
|
||||||
|
void cblas_caxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||||
|
void cblas_zaxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||||
|
|
||||||
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
|
||||||
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
|
||||||
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
|
||||||
|
|
|
@ -130,6 +130,8 @@ endif ()
|
||||||
foreach (float_type ${FLOAT_TYPES})
|
foreach (float_type ${FLOAT_TYPES})
|
||||||
|
|
||||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
|
||||||
|
GenerateNamedObjects("zaxpy.c" "" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||||
|
|
||||||
GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type})
|
GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||||
GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type})
|
GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||||
GenerateNamedObjects("zdot.c" "CONJ" "dotc" ${CBLAS_FLAG} "" "" false ${float_type})
|
GenerateNamedObjects("zdot.c" "CONJ" "dotc" ${CBLAS_FLAG} "" "" false ${float_type})
|
||||||
|
|
|
@ -270,7 +270,8 @@ CSBLAS1OBJS = \
|
||||||
cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \
|
cblas_scopy.$(SUFFIX) cblas_sdot.$(SUFFIX) cblas_sdsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) \
|
||||||
cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \
|
cblas_srot.$(SUFFIX) cblas_srotg.$(SUFFIX) cblas_srotm.$(SUFFIX) cblas_srotmg.$(SUFFIX) \
|
||||||
cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) cblas_saxpby.$(SUFFIX) \
|
cblas_sscal.$(SUFFIX) cblas_sswap.$(SUFFIX) cblas_snrm2.$(SUFFIX) cblas_saxpby.$(SUFFIX) \
|
||||||
cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) cblas_ssum.$(SUFFIX)
|
cblas_ismin.$(SUFFIX) cblas_ismax.$(SUFFIX) cblas_ssum.$(SUFFIX) cblas_samax.$(SUFFIX) \
|
||||||
|
cblas_samin.$(SUFFIX)
|
||||||
|
|
||||||
CSBLAS2OBJS = \
|
CSBLAS2OBJS = \
|
||||||
cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \
|
cblas_sgemv.$(SUFFIX) cblas_sger.$(SUFFIX) cblas_ssymv.$(SUFFIX) cblas_strmv.$(SUFFIX) \
|
||||||
|
@ -295,7 +296,8 @@ CDBLAS1OBJS = \
|
||||||
cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \
|
cblas_dcopy.$(SUFFIX) cblas_ddot.$(SUFFIX) \
|
||||||
cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \
|
cblas_drot.$(SUFFIX) cblas_drotg.$(SUFFIX) cblas_drotm.$(SUFFIX) cblas_drotmg.$(SUFFIX) \
|
||||||
cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) cblas_daxpby.$(SUFFIX) \
|
cblas_dscal.$(SUFFIX) cblas_dswap.$(SUFFIX) cblas_dnrm2.$(SUFFIX) cblas_daxpby.$(SUFFIX) \
|
||||||
cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) cblas_dsum.$(SUFFIX)
|
cblas_idmin.$(SUFFIX) cblas_idmax.$(SUFFIX) cblas_dsum.$(SUFFIX) cblas_damax.$(SUFFIX) \
|
||||||
|
cblas_damin.$(SUFFIX)
|
||||||
|
|
||||||
CDBLAS2OBJS = \
|
CDBLAS2OBJS = \
|
||||||
cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \
|
cblas_dgemv.$(SUFFIX) cblas_dger.$(SUFFIX) cblas_dsymv.$(SUFFIX) cblas_dtrmv.$(SUFFIX) \
|
||||||
|
@ -315,7 +317,7 @@ CCBLAS1OBJS = \
|
||||||
cblas_cdotc_sub.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) \
|
cblas_cdotc_sub.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) \
|
||||||
cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \
|
cblas_cscal.$(SUFFIX) cblas_csscal.$(SUFFIX) \
|
||||||
cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \
|
cblas_cswap.$(SUFFIX) cblas_scnrm2.$(SUFFIX) \
|
||||||
cblas_caxpby.$(SUFFIX) \
|
cblas_caxpby.$(SUFFIX) cblas_scamax.$(SUFFIX) cblas_caxpyc.$(SUFFIX) cblas_scamin.$(SUFFIX) \
|
||||||
cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) cblas_csrot.$(SUFFIX) cblas_crotg.$(SUFFIX)
|
cblas_icmin.$(SUFFIX) cblas_icmax.$(SUFFIX) cblas_scsum.$(SUFFIX) cblas_csrot.$(SUFFIX) cblas_crotg.$(SUFFIX)
|
||||||
|
|
||||||
CCBLAS2OBJS = \
|
CCBLAS2OBJS = \
|
||||||
|
@ -340,12 +342,12 @@ CXERBLAOBJ = \
|
||||||
|
|
||||||
CZBLAS1OBJS = \
|
CZBLAS1OBJS = \
|
||||||
cblas_izamax.$(SUFFIX) cblas_izamin.$(SUFFIX) cblas_dzasum.$(SUFFIX) cblas_zaxpy.$(SUFFIX) \
|
cblas_izamax.$(SUFFIX) cblas_izamin.$(SUFFIX) cblas_dzasum.$(SUFFIX) cblas_zaxpy.$(SUFFIX) \
|
||||||
cblas_zcopy.$(SUFFIX) \
|
cblas_zcopy.$(SUFFIX) cblas_dzamax.$(SUFFIX) cblas_dzamin.$(SUFFIX) \
|
||||||
cblas_zdotc.$(SUFFIX) cblas_zdotu.$(SUFFIX) \
|
cblas_zdotc.$(SUFFIX) cblas_zdotu.$(SUFFIX) \
|
||||||
cblas_zdotc_sub.$(SUFFIX) cblas_zdotu_sub.$(SUFFIX) \
|
cblas_zdotc_sub.$(SUFFIX) cblas_zdotu_sub.$(SUFFIX) \
|
||||||
cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \
|
cblas_zscal.$(SUFFIX) cblas_zdscal.$(SUFFIX) \
|
||||||
cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \
|
cblas_zswap.$(SUFFIX) cblas_dznrm2.$(SUFFIX) \
|
||||||
cblas_zaxpby.$(SUFFIX) \
|
cblas_zaxpby.$(SUFFIX) cblas_zaxpyc.$(SUFFIX) \
|
||||||
cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) cblas_zdrot.$(SUFFIX) cblas_zrotg.$(SUFFIX)
|
cblas_izmin.$(SUFFIX) cblas_izmax.$(SUFFIX) cblas_dzsum.$(SUFFIX) cblas_zdrot.$(SUFFIX) cblas_zrotg.$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
|
@ -1533,6 +1535,30 @@ cblas_icmin.$(SUFFIX) cblas_icmin.$(PSUFFIX) : imax.c
|
||||||
cblas_izmin.$(SUFFIX) cblas_izmin.$(PSUFFIX) : imax.c
|
cblas_izmin.$(SUFFIX) cblas_izmin.$(PSUFFIX) : imax.c
|
||||||
$(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F)
|
$(CC) $(CFLAGS) -DCBLAS -c -UUSE_ABS -DUSE_MIN $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_samax.$(SUFFIX) cblas_samax.$(PSUFFIX) : max.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_damax.$(SUFFIX) cblas_damax.$(PSUFFIX) : max.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_scamax.$(SUFFIX) cblas_scamax.$(PSUFFIX) : max.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_dzamax.$(SUFFIX) cblas_dzamax.$(PSUFFIX) : max.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -UUSE_MIN $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_samin.$(SUFFIX) cblas_samin.$(PSUFFIX) : max.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_damin.$(SUFFIX) cblas_damin.$(PSUFFIX) : max.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_scamin.$(SUFFIX) cblas_scamin.$(PSUFFIX) : max.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_dzamin.$(SUFFIX) cblas_dzamin.$(PSUFFIX) : max.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DUSE_ABS -DUSE_MIN $< -o $(@F)
|
||||||
|
|
||||||
cblas_sasum.$(SUFFIX) cblas_sasum.$(PSUFFIX) : asum.c
|
cblas_sasum.$(SUFFIX) cblas_sasum.$(PSUFFIX) : asum.c
|
||||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||||
|
|
||||||
|
@ -1627,6 +1653,19 @@ cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c
|
||||||
cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c
|
cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c
|
||||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_caxpyc.$(SUFFIX) cblas_caxpyc.$(PSUFFIX) : zaxpy.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_zaxpyc.$(SUFFIX) cblas_zaxpyc.$(PSUFFIX) : zaxpy.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F)
|
||||||
|
|
||||||
|
cblas_xaxpyc.$(SUFFIX) cblas_xaxpyc.$(PSUFFIX) : zaxpy.c
|
||||||
|
$(CC) $(CFLAGS) -DCBLAS -c -DCONJ $< -o $(@F)
|
||||||
|
|
||||||
|
sscal.$(SUFFIX) sscal.$(PSUFFIX) : scal.c
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
|
dscal.$(SUFFIX) dscal.$(PSUFFIX) : scal.c
|
||||||
cblas_zaxpy.$(SUFFIX) cblas_zaxpy.$(PSUFFIX) : zaxpy.c
|
cblas_zaxpy.$(SUFFIX) cblas_zaxpy.$(PSUFFIX) : zaxpy.c
|
||||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||||
|
|
||||||
|
|
|
@ -145,8 +145,13 @@ FLOATRET NAME(blasint *N, FLOAT *x, blasint *INCX){
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
#ifdef COMPLEX
|
||||||
|
FLOAT CNAME(blasint n, void *vx, blasint incx){
|
||||||
|
FLOAT *x = (FLOAT*) vx;
|
||||||
|
#else
|
||||||
FLOAT CNAME(blasint n, FLOAT *x, blasint incx){
|
FLOAT CNAME(blasint n, FLOAT *x, blasint incx){
|
||||||
|
#endif
|
||||||
|
|
||||||
FLOAT ret;
|
FLOAT ret;
|
||||||
|
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
|
@ -14,10 +14,12 @@ ZSCALKERNEL = cscal_lsx.S
|
||||||
SAMAXKERNEL = amax_lsx.S
|
SAMAXKERNEL = amax_lsx.S
|
||||||
DAMAXKERNEL = amax_lsx.S
|
DAMAXKERNEL = amax_lsx.S
|
||||||
CAMAXKERNEL = camax_lsx.S
|
CAMAXKERNEL = camax_lsx.S
|
||||||
|
ZAMAXKERNEL = camax_lsx.S
|
||||||
|
|
||||||
SAMINKERNEL = amin_lsx.S
|
SAMINKERNEL = amin_lsx.S
|
||||||
DAMINKERNEL = amin_lsx.S
|
DAMINKERNEL = amin_lsx.S
|
||||||
CAMINKERNEL = camin_lsx.S
|
CAMINKERNEL = camin_lsx.S
|
||||||
|
ZAMINKERNEL = camin_lsx.S
|
||||||
|
|
||||||
SMAXKERNEL = max_lsx.S
|
SMAXKERNEL = max_lsx.S
|
||||||
DMAXKERNEL = max_lsx.S
|
DMAXKERNEL = max_lsx.S
|
||||||
|
|
|
@ -14,10 +14,12 @@ ZSCALKERNEL = cscal_lasx.S
|
||||||
SAMAXKERNEL = amax_lasx.S
|
SAMAXKERNEL = amax_lasx.S
|
||||||
DAMAXKERNEL = amax_lasx.S
|
DAMAXKERNEL = amax_lasx.S
|
||||||
CAMAXKERNEL = camax_lasx.S
|
CAMAXKERNEL = camax_lasx.S
|
||||||
|
ZAMAXKERNEL = camax_lasx.S
|
||||||
|
|
||||||
SAMINKERNEL = amin_lasx.S
|
SAMINKERNEL = amin_lasx.S
|
||||||
DAMINKERNEL = amin_lasx.S
|
DAMINKERNEL = amin_lasx.S
|
||||||
CAMINKERNEL = camin_lasx.S
|
CAMINKERNEL = camin_lasx.S
|
||||||
|
ZAMINKERNEL = camin_lasx.S
|
||||||
|
|
||||||
SMAXKERNEL = max_lsx.S
|
SMAXKERNEL = max_lsx.S
|
||||||
DMAXKERNEL = max_lsx.S
|
DMAXKERNEL = max_lsx.S
|
||||||
|
|
|
@ -66,7 +66,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
xvldrepl.w VM0, X, 0
|
xvldrepl.w VM0, X, 0
|
||||||
#endif
|
#endif
|
||||||
XVFSUB VM0, VM0, VM0
|
|
||||||
bne INCX, TEMP, .L20
|
bne INCX, TEMP, .L20
|
||||||
|
|
||||||
srai.d I, N, 4
|
srai.d I, N, 4
|
||||||
|
|
|
@ -66,7 +66,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
vldrepl.w VM0, X, 0
|
vldrepl.w VM0, X, 0
|
||||||
#endif
|
#endif
|
||||||
VFSUB VM0, VM0, VM0
|
|
||||||
bne INCX, TEMP, .L20
|
bne INCX, TEMP, .L20
|
||||||
|
|
||||||
srai.d I, N, 3
|
srai.d I, N, 3
|
||||||
|
|
|
@ -63,42 +63,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
bge $r0, N, .L999
|
bge $r0, N, .L999
|
||||||
bge $r0, INCX, .L999
|
bge $r0, INCX, .L999
|
||||||
li.d TEMP, 1
|
li.d TEMP, 1
|
||||||
li.w I, -1
|
|
||||||
slli.d TEMP, TEMP, ZBASE_SHIFT
|
slli.d TEMP, TEMP, ZBASE_SHIFT
|
||||||
slli.d INCX, INCX, ZBASE_SHIFT
|
slli.d INCX, INCX, ZBASE_SHIFT
|
||||||
xvreplgr2vr.w neg1, I
|
|
||||||
xvffint.s.w neg1, neg1
|
|
||||||
srai.d I, N, 3
|
srai.d I, N, 3
|
||||||
bne INCX, TEMP, .L20
|
bne INCX, TEMP, .L20
|
||||||
bge $r0, I, .L23
|
bge $r0, I, .L23
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L10:
|
.L10:
|
||||||
xvld VX0, X, 0 * SIZE
|
xvld VX0, X, 0
|
||||||
xvld VX1, X, 8 * SIZE
|
xvld VX1, X, 32
|
||||||
addi.d I, I, -1
|
#ifdef DOUBLE
|
||||||
|
xvpickev.d x1, VX1, VX0
|
||||||
|
xvpickod.d x2, VX1, VX0
|
||||||
|
#else
|
||||||
xvpickev.w x1, VX1, VX0
|
xvpickev.w x1, VX1, VX0
|
||||||
xvpickod.w x2, VX1, VX0
|
xvpickod.w x2, VX1, VX0
|
||||||
xvfmul.s x3, neg1, x1
|
#endif
|
||||||
xvfmul.s x4, neg1, x2
|
XVFSUB x3, res0, x1
|
||||||
xvfcmp.clt.s VT0, x1, res0
|
XVFSUB x4, res0, x2
|
||||||
xvfcmp.clt.s VT1, x2, res0
|
XVFMAX x1, x1, x3
|
||||||
xvbitsel.v x1, x1, x3, VT0
|
XVFMAX x2, x2, x4
|
||||||
xvbitsel.v x2, x2, x4, VT1
|
XVFADD VM1, x1, x2
|
||||||
|
XVFMAX VM0, VM0, VM1
|
||||||
|
#ifdef DOUBLE
|
||||||
|
xvld VX0, X, 64
|
||||||
|
xvld VX1, X, 96
|
||||||
|
xvpickev.d x1, VX1, VX0
|
||||||
|
xvpickod.d x2, VX1, VX0
|
||||||
|
XVFSUB x3, res0, x1
|
||||||
|
XVFSUB x4, res0, x2
|
||||||
|
XVFMAX x1, x1, x3
|
||||||
|
XVFMAX x2, x2, x4
|
||||||
|
XVFADD VM1, x1, x2
|
||||||
|
XVFMAX VM0, VM0, VM1
|
||||||
|
#endif
|
||||||
|
addi.d I, I, -1
|
||||||
addi.d X, X, 16 * SIZE
|
addi.d X, X, 16 * SIZE
|
||||||
xvfadd.s VM1, x1, x2
|
|
||||||
xvfmax.s VM0, VM0, VM1
|
|
||||||
blt $r0, I, .L10
|
blt $r0, I, .L10
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L11:
|
.L11:
|
||||||
|
#ifdef DOUBLE
|
||||||
|
xvpickve.d x1, VM0, 0
|
||||||
|
xvpickve.d x2, VM0, 1
|
||||||
|
XVFMAX VM0, x1, x2
|
||||||
|
#else
|
||||||
xvpickve.w x1, VM0, 0
|
xvpickve.w x1, VM0, 0
|
||||||
xvpickve.w x2, VM0, 1
|
xvpickve.w x2, VM0, 1
|
||||||
xvpickve.w x3, VM0, 2
|
xvpickve.w x3, VM0, 2
|
||||||
xvpickve.w x4, VM0, 3
|
xvpickve.w x4, VM0, 3
|
||||||
xvfmax.s VM1, x1, x2
|
XVFMAX VM0, x1, x2
|
||||||
xvfmax.s VM0, x3, x4
|
XVFMAX VM1, x3, x4
|
||||||
xvfmax.s VM0, VM0, VM1
|
XVFMAX VM0, VM0, VM1
|
||||||
|
#endif
|
||||||
b .L23
|
b .L23
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -107,66 +125,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L21:
|
.L21:
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmax.s s1, t1, t3
|
FMAX s1, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmax.s s1, t1, t3
|
FMAX s1, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
addi.d I, I, -1
|
addi.d I, I, -1
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmax.s s3, t1, t3
|
FMAX s3, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmax.s s4, t1, t3
|
FMAX s4, t1, t3
|
||||||
blt $r0, I, .L21
|
blt $r0, I, .L21
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L22:
|
.L22:
|
||||||
fmax.s s1, s1, s2
|
FMAX s1, s1, s2
|
||||||
fmax.s s3, s3, s4
|
FMAX s3, s3, s4
|
||||||
fmax.s s1, s1, s3
|
FMAX s1, s1, s3
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L23: //N<8
|
.L23: //N<8
|
||||||
|
@ -182,12 +200,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
FABS a1, a1
|
FABS a1, a1
|
||||||
ADD a0, a0, a1
|
ADD a0, a0, a1
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fmax.s s1, a0, s1
|
FMAX s1, a0, s1
|
||||||
blt $r0, I, .L24
|
blt $r0, I, .L24
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
fmov.s $f0, $f22
|
MOV $f0, $f22
|
||||||
jirl $r0, $r1, 0x0
|
jirl $r0, $r1, 0x0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
|
|
@ -63,54 +63,87 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
bge $r0, N, .L999
|
bge $r0, N, .L999
|
||||||
bge $r0, INCX, .L999
|
bge $r0, INCX, .L999
|
||||||
li.d TEMP, 1
|
li.d TEMP, 1
|
||||||
li.w I, -1
|
|
||||||
slli.d TEMP, TEMP, ZBASE_SHIFT
|
slli.d TEMP, TEMP, ZBASE_SHIFT
|
||||||
slli.d INCX, INCX, ZBASE_SHIFT
|
slli.d INCX, INCX, ZBASE_SHIFT
|
||||||
vreplgr2vr.w neg1, I
|
|
||||||
vffint.s.w neg1, neg1
|
|
||||||
srai.d I, N, 3
|
srai.d I, N, 3
|
||||||
bne INCX, TEMP, .L20
|
bne INCX, TEMP, .L20
|
||||||
bge $r0, I, .L23
|
bge $r0, I, .L23
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L10:
|
.L10:
|
||||||
vld VX0, X, 0 * SIZE
|
vld VX0, X, 0
|
||||||
vld VX1, X, 4 * SIZE
|
vld VX1, X, 16
|
||||||
addi.d I, I, -1
|
#ifdef DOUBLE
|
||||||
|
vpickev.d x1, VX1, VX0
|
||||||
|
vpickod.d x2, VX1, VX0
|
||||||
|
#else
|
||||||
vpickev.w x1, VX1, VX0
|
vpickev.w x1, VX1, VX0
|
||||||
vpickod.w x2, VX1, VX0
|
vpickod.w x2, VX1, VX0
|
||||||
vfmul.s x3, neg1, x1
|
#endif
|
||||||
vfmul.s x4, neg1, x2
|
VFSUB x3, res0, x1
|
||||||
vfcmp.clt.s VT0, x1, res0
|
VFSUB x4, res0, x2
|
||||||
vfcmp.clt.s VT1, x2, res0
|
VFMAX x1, x1, x3
|
||||||
vld VX0, X, 8 * SIZE
|
VFMAX x2, x2, x4
|
||||||
vbitsel.v x1, x1, x3, VT0
|
VFADD VM1, x1, x2
|
||||||
vbitsel.v x2, x2, x4, VT1
|
|
||||||
vld VX1, X, 12 * SIZE
|
vld VX0, X, 32
|
||||||
vfadd.s VM1, x1, x2
|
vld VX1, X, 48
|
||||||
|
#ifdef DOUBLE
|
||||||
|
vpickev.d x1, VX1, VX0
|
||||||
|
vpickod.d x2, VX1, VX0
|
||||||
|
#else
|
||||||
vpickev.w x1, VX1, VX0
|
vpickev.w x1, VX1, VX0
|
||||||
vpickod.w x2, VX1, VX0
|
vpickod.w x2, VX1, VX0
|
||||||
vfmul.s x3, neg1, x1
|
#endif
|
||||||
vfmul.s x4, neg1, x2
|
VFSUB x3, res0, x1
|
||||||
vfcmp.clt.s VT0, x1, res0
|
VFSUB x4, res0, x2
|
||||||
vfcmp.clt.s VT1, x2, res0
|
VFMAX x1, x1, x3
|
||||||
|
VFMAX x2, x2, x4
|
||||||
|
VFADD x1, x1, x2
|
||||||
|
VFMAX VM1, x1, VM1
|
||||||
|
VFMAX VM0, VM0, VM1
|
||||||
|
#ifdef DOUBLE
|
||||||
|
vld VX0, X, 64
|
||||||
|
vld VX1, X, 80
|
||||||
|
vpickev.d x1, VX1, VX0
|
||||||
|
vpickod.d x2, VX1, VX0
|
||||||
|
VFSUB x3, res0, x1
|
||||||
|
VFSUB x4, res0, x2
|
||||||
|
VFMAX x1, x1, x3
|
||||||
|
VFMAX x2, x2, x4
|
||||||
|
VFADD VM1, x1, x2
|
||||||
|
|
||||||
|
vld VX0, X, 96
|
||||||
|
vld VX1, X, 112
|
||||||
|
vpickev.d x1, VX1, VX0
|
||||||
|
vpickod.d x2, VX1, VX0
|
||||||
|
VFSUB x3, res0, x1
|
||||||
|
VFSUB x4, res0, x2
|
||||||
|
VFMAX x1, x1, x3
|
||||||
|
VFMAX x2, x2, x4
|
||||||
|
VFADD x1, x1, x2
|
||||||
|
VFMAX VM1, x1, VM1
|
||||||
|
VFMAX VM0, VM0, VM1
|
||||||
|
#endif
|
||||||
addi.d X, X, 16 * SIZE
|
addi.d X, X, 16 * SIZE
|
||||||
vbitsel.v x1, x1, x3, VT0
|
addi.d I, I, -1
|
||||||
vbitsel.v x2, x2, x4, VT1
|
|
||||||
vfadd.s x1, x1, x2
|
|
||||||
vfmax.s VM1, x1, VM1
|
|
||||||
vfmax.s VM0, VM0, VM1
|
|
||||||
blt $r0, I, .L10
|
blt $r0, I, .L10
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L11:
|
.L11:
|
||||||
|
#ifdef DOUBLE
|
||||||
|
vreplvei.d x1, VM0, 0
|
||||||
|
vreplvei.d x2, VM0, 1
|
||||||
|
VFMAX VM0, x1, x2
|
||||||
|
#else
|
||||||
vreplvei.w x1, VM0, 0
|
vreplvei.w x1, VM0, 0
|
||||||
vreplvei.w x2, VM0, 1
|
vreplvei.w x2, VM0, 1
|
||||||
vreplvei.w x3, VM0, 2
|
vreplvei.w x3, VM0, 2
|
||||||
vreplvei.w x4, VM0, 3
|
vreplvei.w x4, VM0, 3
|
||||||
vfmax.s VM1, x1, x2
|
VFMAX VM1, x1, x2
|
||||||
vfmax.s VM0, x3, x4
|
VFMAX VM0, x3, x4
|
||||||
vfmax.s VM0, VM0, VM1
|
VFMAX VM0, VM0, VM1
|
||||||
|
#endif
|
||||||
b .L23
|
b .L23
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -119,66 +152,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L21:
|
.L21:
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmax.s s1, t1, t3
|
FMAX s1, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmax.s s1, t1, t3
|
FMAX s1, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
addi.d I, I, -1
|
addi.d I, I, -1
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmax.s s3, t1, t3
|
FMAX s3, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmax.s s4, t1, t3
|
FMAX s4, t1, t3
|
||||||
blt $r0, I, .L21
|
blt $r0, I, .L21
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L22:
|
.L22:
|
||||||
fmax.s s1, s1, s2
|
FMAX s1, s1, s2
|
||||||
fmax.s s3, s3, s4
|
FMAX s3, s3, s4
|
||||||
fmax.s s1, s1, s3
|
FMAX s1, s1, s3
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L23: //N<8
|
.L23: //N<8
|
||||||
|
@ -187,19 +220,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L24:
|
.L24:
|
||||||
fld.s a0, X, 0 * SIZE
|
LD a0, X, 0 * SIZE
|
||||||
fld.s a1, X, 1 * SIZE
|
LD a1, X, 1 * SIZE
|
||||||
addi.d I, I, -1
|
addi.d I, I, -1
|
||||||
fabs.s a0, a0
|
FABS a0, a0
|
||||||
fabs.s a1, a1
|
FABS a1, a1
|
||||||
fadd.s a0, a0, a1
|
ADD a0, a0, a1
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fmax.s s1, a0, s1
|
FMAX s1, a0, s1
|
||||||
blt $r0, I, .L24
|
blt $r0, I, .L24
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
fmov.s $f0, $f22
|
MOV $f0, $f22
|
||||||
jirl $r0, $r1, 0x0
|
jirl $r0, $r1, 0x0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
|
|
@ -61,49 +61,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
xvxor.v res0, res0, res0
|
xvxor.v res0, res0, res0
|
||||||
bge $r0, N, .L999
|
bge $r0, N, .L999
|
||||||
bge $r0, INCX, .L999
|
bge $r0, INCX, .L999
|
||||||
fld.s a0, X, 0 * SIZE
|
LD a0, X, 0 * SIZE
|
||||||
fld.s a1, X, 1 * SIZE
|
LD a1, X, 1 * SIZE
|
||||||
fabs.s a0, a0
|
FABS a0, a0
|
||||||
fabs.s a1, a1
|
FABS a1, a1
|
||||||
fadd.s s1, a1, a0
|
ADD s1, a1, a0
|
||||||
|
#ifdef DOUBLE
|
||||||
|
xvreplve0.d VM0, VM0
|
||||||
|
#else
|
||||||
xvreplve0.w VM0, VM0
|
xvreplve0.w VM0, VM0
|
||||||
|
#endif
|
||||||
li.d TEMP, 1
|
li.d TEMP, 1
|
||||||
li.w I, -1
|
|
||||||
slli.d TEMP, TEMP, ZBASE_SHIFT
|
slli.d TEMP, TEMP, ZBASE_SHIFT
|
||||||
slli.d INCX, INCX, ZBASE_SHIFT
|
slli.d INCX, INCX, ZBASE_SHIFT
|
||||||
xvreplgr2vr.w neg1, I
|
|
||||||
xvffint.s.w neg1, neg1
|
|
||||||
srai.d I, N, 3
|
srai.d I, N, 3
|
||||||
bne INCX, TEMP, .L20
|
bne INCX, TEMP, .L20
|
||||||
bge $r0, I, .L23
|
bge $r0, I, .L23
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L10:
|
.L10:
|
||||||
xvld VX0, X, 0 * SIZE
|
xvld VX0, X, 0
|
||||||
xvld VX1, X, 8 * SIZE
|
xvld VX1, X, 32
|
||||||
addi.d I, I, -1
|
#ifdef DOUBLE
|
||||||
|
xvpickev.d x1, VX1, VX0
|
||||||
|
xvpickod.d x2, VX1, VX0
|
||||||
|
#else
|
||||||
xvpickev.w x1, VX1, VX0
|
xvpickev.w x1, VX1, VX0
|
||||||
xvpickod.w x2, VX1, VX0
|
xvpickod.w x2, VX1, VX0
|
||||||
xvfmul.s x3, neg1, x1
|
#endif
|
||||||
xvfmul.s x4, neg1, x2
|
XVFSUB x3, res0, x1
|
||||||
xvfcmp.clt.s VT0, x1, res0
|
XVFSUB x4, res0, x2
|
||||||
xvfcmp.clt.s VT1, x2, res0
|
XVFMAX x1, x1, x3
|
||||||
xvbitsel.v x1, x1, x3, VT0
|
XVFMAX x2, x2, x4
|
||||||
xvbitsel.v x2, x2, x4, VT1
|
XVFADD VM1, x1, x2
|
||||||
|
XVFMIN VM0, VM0, VM1
|
||||||
|
#ifdef DOUBLE
|
||||||
|
xvld VX0, X, 64
|
||||||
|
xvld VX1, X, 96
|
||||||
|
xvpickev.d x1, VX1, VX0
|
||||||
|
xvpickod.d x2, VX1, VX0
|
||||||
|
XVFSUB x3, res0, x1
|
||||||
|
XVFSUB x4, res0, x2
|
||||||
|
XVFMAX x1, x1, x3
|
||||||
|
XVFMAX x2, x2, x4
|
||||||
|
XVFADD VM1, x1, x2
|
||||||
|
XVFMIN VM0, VM0, VM1
|
||||||
|
#endif
|
||||||
|
addi.d I, I, -1
|
||||||
addi.d X, X, 16 * SIZE
|
addi.d X, X, 16 * SIZE
|
||||||
xvfadd.s VM1, x1, x2
|
|
||||||
xvfmin.s VM0, VM0, VM1
|
|
||||||
blt $r0, I, .L10
|
blt $r0, I, .L10
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L11:
|
.L11:
|
||||||
|
#ifdef DOUBLE
|
||||||
|
xvpickve.d x1, VM0, 0
|
||||||
|
xvpickve.d x2, VM0, 1
|
||||||
|
XVFMIN VM0, x1, x2
|
||||||
|
#else
|
||||||
xvpickve.w x1, VM0, 0
|
xvpickve.w x1, VM0, 0
|
||||||
xvpickve.w x2, VM0, 1
|
xvpickve.w x2, VM0, 1
|
||||||
xvpickve.w x3, VM0, 2
|
xvpickve.w x3, VM0, 2
|
||||||
xvpickve.w x4, VM0, 3
|
xvpickve.w x4, VM0, 3
|
||||||
xvfmin.s VM1, x1, x2
|
XVFMIN VM0, x1, x2
|
||||||
xvfmin.s VM0, x3, x4
|
XVFMIN VM1, x3, x4
|
||||||
xvfmin.s VM0, VM0, VM1
|
XVFMIN VM0, VM0, VM1
|
||||||
|
#endif
|
||||||
b .L23
|
b .L23
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -112,66 +134,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L21:
|
.L21:
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmin.s s1, t1, t3
|
FMIN s1, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmin.s s1, t1, t3
|
FMIN s1, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
addi.d I, I, -1
|
addi.d I, I, -1
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmin.s s3, t1, t3
|
FMIN s3, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmin.s s4, t1, t3
|
FMIN s4, t1, t3
|
||||||
blt $r0, I, .L21
|
blt $r0, I, .L21
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L22:
|
.L22:
|
||||||
fmin.s s1, s1, s2
|
FMIN s1, s1, s2
|
||||||
fmin.s s3, s3, s4
|
FMIN s3, s3, s4
|
||||||
fmin.s s1, s1, s3
|
FMIN s1, s1, s3
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L23: //N<8
|
.L23: //N<8
|
||||||
|
@ -187,12 +209,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
FABS a1, a1
|
FABS a1, a1
|
||||||
ADD a0, a0, a1
|
ADD a0, a0, a1
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fmin.s s1, a0, s1
|
FMIN s1, a0, s1
|
||||||
blt $r0, I, .L24
|
blt $r0, I, .L24
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
fmov.s $f0, $f22
|
MOV $f0, $f22
|
||||||
jirl $r0, $r1, 0x0
|
jirl $r0, $r1, 0x0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
|
|
@ -61,61 +61,98 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
vxor.v res0, res0, res0
|
vxor.v res0, res0, res0
|
||||||
bge $r0, N, .L999
|
bge $r0, N, .L999
|
||||||
bge $r0, INCX, .L999
|
bge $r0, INCX, .L999
|
||||||
fld.s a0, X, 0 * SIZE
|
LD a0, X, 0 * SIZE
|
||||||
fld.s a1, X, 1 * SIZE
|
LD a1, X, 1 * SIZE
|
||||||
fabs.s a0, a0
|
FABS a0, a0
|
||||||
fabs.s a1, a1
|
FABS a1, a1
|
||||||
fadd.s s1, a1, a0
|
ADD s1, a1, a0
|
||||||
|
#ifdef DOUBLE
|
||||||
|
vreplvei.d VM0, VM0, 0
|
||||||
|
#else
|
||||||
vreplvei.w VM0, VM0, 0
|
vreplvei.w VM0, VM0, 0
|
||||||
|
#endif
|
||||||
li.d TEMP, 1
|
li.d TEMP, 1
|
||||||
li.w I, -1
|
|
||||||
slli.d TEMP, TEMP, ZBASE_SHIFT
|
slli.d TEMP, TEMP, ZBASE_SHIFT
|
||||||
slli.d INCX, INCX, ZBASE_SHIFT
|
slli.d INCX, INCX, ZBASE_SHIFT
|
||||||
vreplgr2vr.w neg1, I
|
|
||||||
vffint.s.w neg1, neg1
|
|
||||||
srai.d I, N, 3
|
srai.d I, N, 3
|
||||||
bne INCX, TEMP, .L20
|
bne INCX, TEMP, .L20
|
||||||
bge $r0, I, .L23
|
bge $r0, I, .L23
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L10:
|
.L10:
|
||||||
vld VX0, X, 0 * SIZE
|
vld VX0, X, 0
|
||||||
vld VX1, X, 4 * SIZE
|
vld VX1, X, 16
|
||||||
|
#ifdef DOUBLE
|
||||||
|
vpickev.d x1, VX1, VX0
|
||||||
|
vpickod.d x2, VX1, VX0
|
||||||
|
#else
|
||||||
|
vpickev.w x1, VX1, VX0
|
||||||
|
vpickod.w x2, VX1, VX0
|
||||||
|
#endif
|
||||||
|
VFSUB x3, res0, x1
|
||||||
|
VFSUB x4, res0, x2
|
||||||
|
VFMAX x1, x1, x3
|
||||||
|
VFMAX x2, x2, x4
|
||||||
|
VFADD VM1, x1, x2
|
||||||
|
|
||||||
|
vld VX0, X, 32
|
||||||
|
vld VX1, X, 48
|
||||||
|
#ifdef DOUBLE
|
||||||
|
vpickev.d x1, VX1, VX0
|
||||||
|
vpickod.d x2, VX1, VX0
|
||||||
|
#else
|
||||||
|
vpickev.w x1, VX1, VX0
|
||||||
|
vpickod.w x2, VX1, VX0
|
||||||
|
#endif
|
||||||
|
VFSUB x3, res0, x1
|
||||||
|
VFSUB x4, res0, x2
|
||||||
|
VFMAX x1, x1, x3
|
||||||
|
VFMAX x2, x2, x4
|
||||||
|
VFADD x1, x1, x2
|
||||||
|
VFMIN VM1, x1, VM1
|
||||||
|
VFMIN VM0, VM0, VM1
|
||||||
|
#ifdef DOUBLE
|
||||||
|
vld VX0, X, 64
|
||||||
|
vld VX1, X, 80
|
||||||
|
vpickev.d x1, VX1, VX0
|
||||||
|
vpickod.d x2, VX1, VX0
|
||||||
|
VFSUB x3, res0, x1
|
||||||
|
VFSUB x4, res0, x2
|
||||||
|
VFMAX x1, x1, x3
|
||||||
|
VFMAX x2, x2, x4
|
||||||
|
VFADD VM1, x1, x2
|
||||||
|
|
||||||
|
vld VX0, X, 96
|
||||||
|
vld VX1, X, 112
|
||||||
|
vpickev.d x1, VX1, VX0
|
||||||
|
vpickod.d x2, VX1, VX0
|
||||||
|
VFSUB x3, res0, x1
|
||||||
|
VFSUB x4, res0, x2
|
||||||
|
VFMAX x1, x1, x3
|
||||||
|
VFMAX x2, x2, x4
|
||||||
|
VFADD x1, x1, x2
|
||||||
|
VFMIN VM1, x1, VM1
|
||||||
|
VFMIN VM0, VM0, VM1
|
||||||
|
#endif
|
||||||
addi.d I, I, -1
|
addi.d I, I, -1
|
||||||
vpickev.w x1, VX1, VX0
|
|
||||||
vpickod.w x2, VX1, VX0
|
|
||||||
vfmul.s x3, neg1, x1
|
|
||||||
vfmul.s x4, neg1, x2
|
|
||||||
vfcmp.clt.s VT0, x1, res0
|
|
||||||
vfcmp.clt.s VT1, x2, res0
|
|
||||||
vld VX0, X, 8 * SIZE
|
|
||||||
vbitsel.v x1, x1, x3, VT0
|
|
||||||
vbitsel.v x2, x2, x4, VT1
|
|
||||||
vld VX1, X, 12 * SIZE
|
|
||||||
vfadd.s VM1, x1, x2
|
|
||||||
vpickev.w x1, VX1, VX0
|
|
||||||
vpickod.w x2, VX1, VX0
|
|
||||||
vfmul.s x3, neg1, x1
|
|
||||||
vfmul.s x4, neg1, x2
|
|
||||||
vfcmp.clt.s VT0, x1, res0
|
|
||||||
vfcmp.clt.s VT1, x2, res0
|
|
||||||
addi.d X, X, 16 * SIZE
|
addi.d X, X, 16 * SIZE
|
||||||
vbitsel.v x1, x1, x3, VT0
|
|
||||||
vbitsel.v x2, x2, x4, VT1
|
|
||||||
vfadd.s x1, x1, x2
|
|
||||||
vfmin.s VM1, x1, VM1
|
|
||||||
vfmin.s VM0, VM0, VM1
|
|
||||||
blt $r0, I, .L10
|
blt $r0, I, .L10
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L11:
|
.L11:
|
||||||
|
#ifdef DOUBLE
|
||||||
|
vreplvei.d x1, VM0, 0
|
||||||
|
vreplvei.d x2, VM0, 1
|
||||||
|
VFMIN VM0, x1, x2
|
||||||
|
#else
|
||||||
vreplvei.w x1, VM0, 0
|
vreplvei.w x1, VM0, 0
|
||||||
vreplvei.w x2, VM0, 1
|
vreplvei.w x2, VM0, 1
|
||||||
vreplvei.w x3, VM0, 2
|
vreplvei.w x3, VM0, 2
|
||||||
vreplvei.w x4, VM0, 3
|
vreplvei.w x4, VM0, 3
|
||||||
vfmin.s VM1, x1, x2
|
VFMIN VM1, x1, x2
|
||||||
vfmin.s VM0, x3, x4
|
VFMIN VM0, x3, x4
|
||||||
vfmin.s VM0, VM0, VM1
|
VFMIN VM0, VM0, VM1
|
||||||
|
#endif
|
||||||
b .L23
|
b .L23
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -124,66 +161,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L21:
|
.L21:
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmin.s s1, t1, t3
|
FMIN s1, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmin.s s1, t1, t3
|
FMIN s1, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
addi.d I, I, -1
|
addi.d I, I, -1
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmin.s s3, t1, t3
|
FMIN s3, t1, t3
|
||||||
fld.s t1, X, 0 * SIZE
|
LD t1, X, 0 * SIZE
|
||||||
fld.s t2, X, 1 * SIZE
|
LD t2, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fld.s t3, X, 0 * SIZE
|
LD t3, X, 0 * SIZE
|
||||||
fld.s t4, X, 1 * SIZE
|
LD t4, X, 1 * SIZE
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fabs.s t1, t1
|
FABS t1, t1
|
||||||
fabs.s t2, t2
|
FABS t2, t2
|
||||||
fabs.s t3, t3
|
FABS t3, t3
|
||||||
fabs.s t4, t4
|
FABS t4, t4
|
||||||
fadd.s t1, t1, t2
|
ADD t1, t1, t2
|
||||||
fadd.s t3, t3, t4
|
ADD t3, t3, t4
|
||||||
fmin.s s4, t1, t3
|
FMIN s4, t1, t3
|
||||||
blt $r0, I, .L21
|
blt $r0, I, .L21
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L22:
|
.L22:
|
||||||
fmin.s s1, s1, s2
|
FMIN s1, s1, s2
|
||||||
fmin.s s3, s3, s4
|
FMIN s3, s3, s4
|
||||||
fmin.s s1, s1, s3
|
FMIN s1, s1, s3
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L23: //N<8
|
.L23: //N<8
|
||||||
|
@ -192,19 +229,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L24:
|
.L24:
|
||||||
fld.s a0, X, 0 * SIZE
|
LD a0, X, 0 * SIZE
|
||||||
fld.s a1, X, 1 * SIZE
|
LD a1, X, 1 * SIZE
|
||||||
addi.d I, I, -1
|
addi.d I, I, -1
|
||||||
fabs.s a0, a0
|
FABS a0, a0
|
||||||
fabs.s a1, a1
|
FABS a1, a1
|
||||||
fadd.s a0, a0, a1
|
ADD a0, a0, a1
|
||||||
add.d X, X, INCX
|
add.d X, X, INCX
|
||||||
fmin.s s1, a0, s1
|
FMIN s1, a0, s1
|
||||||
blt $r0, I, .L24
|
blt $r0, I, .L24
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L999:
|
.L999:
|
||||||
fmov.s $f0, $f22
|
MOV $f0, $f22
|
||||||
jirl $r0, $r1, 0x0
|
jirl $r0, $r1, 0x0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
|
|
@ -99,7 +99,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
|
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
|
|
||||||
.L14:
|
.L14:
|
||||||
bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0
|
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
|
||||||
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
|
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -117,38 +117,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L997
|
b .L997
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L112: //alpha_r == 0.0 && alpha_i != 0.0
|
|
||||||
xvld VX0, X, 0 * SIZE
|
|
||||||
#ifdef DOUBLE
|
|
||||||
xvld VX1, X, 4 * SIZE
|
|
||||||
xvpickev.d x1, VX1, VX0
|
|
||||||
xvpickod.d x2, VX1, VX0
|
|
||||||
xvfmul.d x3, VXAI, x2
|
|
||||||
xvfsub.d x3, VXZ, x3
|
|
||||||
xvfmul.d x4, VXAI, x1
|
|
||||||
xvilvl.d VX2, x4 ,x3
|
|
||||||
xvilvh.d VX3, x4, x3
|
|
||||||
xvst VX2, X, 0 * SIZE
|
|
||||||
xvst VX3, X, 4 * SIZE
|
|
||||||
addi.d X, X, 8 * SIZE
|
|
||||||
#else
|
|
||||||
xvld VX1, X, 8 * SIZE
|
|
||||||
xvpickev.w x1, VX1, VX0
|
|
||||||
xvpickod.w x2, VX1, VX0
|
|
||||||
xvfmul.s x3, VXAI, x2
|
|
||||||
xvfsub.s x3, VXZ, x3
|
|
||||||
xvfmul.s x4, VXAI, x1
|
|
||||||
xvilvl.w VX2, x4 ,x3
|
|
||||||
xvilvh.w VX3, x4, x3
|
|
||||||
xvst VX2, X, 0 * SIZE
|
|
||||||
xvst VX3, X, 8 * SIZE
|
|
||||||
addi.d X, X, 16 * SIZE
|
|
||||||
#endif
|
|
||||||
addi.d I, I, -1
|
|
||||||
blt $r0, I, .L112
|
|
||||||
b .L997
|
|
||||||
.align 3
|
|
||||||
|
|
||||||
.L113: //alpha_r != 0.0 && alpha_i == 0.0
|
.L113: //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
xvld VX0, X, 0 * SIZE
|
xvld VX0, X, 0 * SIZE
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
|
@ -227,7 +195,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
|
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
|
|
||||||
.L24:
|
.L24:
|
||||||
bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0
|
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
|
||||||
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
|
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -275,119 +243,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L997
|
b .L997
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L222: //alpha_r == 0.0 && alpha_i != 0.0
|
|
||||||
#ifdef DOUBLE
|
|
||||||
ld.d t1, X, 0 * SIZE
|
|
||||||
ld.d t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.d t3, X, 0 * SIZE
|
|
||||||
ld.d t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
xvinsgr2vr.d x1, t1, 0
|
|
||||||
xvinsgr2vr.d x2, t2, 0
|
|
||||||
xvinsgr2vr.d x1, t3, 1
|
|
||||||
xvinsgr2vr.d x2, t4, 1
|
|
||||||
ld.d t1, X, 0 * SIZE
|
|
||||||
ld.d t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.d t3, X, 0 * SIZE
|
|
||||||
ld.d t4, X, 1 * SIZE
|
|
||||||
xvinsgr2vr.d x1, t1, 2
|
|
||||||
xvinsgr2vr.d x2, t2, 2
|
|
||||||
xvinsgr2vr.d x1, t3, 3
|
|
||||||
xvinsgr2vr.d x2, t4, 3
|
|
||||||
add.d X, X, INCX
|
|
||||||
|
|
||||||
xvfmul.d x3, VXAI, x2
|
|
||||||
xvfsub.d x3, VXZ, x3
|
|
||||||
xvfmul.d x4, VXAI, x1
|
|
||||||
addi.d I, I, -1
|
|
||||||
xvstelm.d x3, XX, 0 * SIZE, 0
|
|
||||||
xvstelm.d x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.d x3, XX, 0 * SIZE, 1
|
|
||||||
xvstelm.d x4, XX, 1 * SIZE, 1
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.d x3, XX, 0 * SIZE, 2
|
|
||||||
xvstelm.d x4, XX, 1 * SIZE, 2
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.d x3, XX, 0 * SIZE, 3
|
|
||||||
xvstelm.d x4, XX, 1 * SIZE, 3
|
|
||||||
#else
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
xvinsgr2vr.w x1, t1, 0
|
|
||||||
xvinsgr2vr.w x2, t2, 0
|
|
||||||
xvinsgr2vr.w x1, t3, 1
|
|
||||||
xvinsgr2vr.w x2, t4, 1
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
xvinsgr2vr.w x1, t1, 2
|
|
||||||
xvinsgr2vr.w x2, t2, 2
|
|
||||||
xvinsgr2vr.w x1, t3, 3
|
|
||||||
xvinsgr2vr.w x2, t4, 3
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
xvinsgr2vr.w x1, t1, 4
|
|
||||||
xvinsgr2vr.w x2, t2, 4
|
|
||||||
xvinsgr2vr.w x1, t3, 5
|
|
||||||
xvinsgr2vr.w x2, t4, 5
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
xvinsgr2vr.w x1, t1, 6
|
|
||||||
xvinsgr2vr.w x2, t2, 6
|
|
||||||
xvinsgr2vr.w x1, t3, 7
|
|
||||||
xvinsgr2vr.w x2, t4, 7
|
|
||||||
add.d X, X, INCX
|
|
||||||
|
|
||||||
xvfmul.s x3, VXAI, x2
|
|
||||||
xvfsub.s x3, VXZ, x3
|
|
||||||
xvfmul.s x4, VXAI, x1
|
|
||||||
addi.d I, I, -1
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 0
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 1
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 1
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 2
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 2
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 3
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 3
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 4
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 4
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 5
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 5
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 6
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 6
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
xvstelm.w x3, XX, 0 * SIZE, 7
|
|
||||||
xvstelm.w x4, XX, 1 * SIZE, 7
|
|
||||||
#endif
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
blt $r0, I, .L222
|
|
||||||
b .L997
|
|
||||||
.align 3
|
|
||||||
|
|
||||||
.L223: //alpha_r != 0.0 && alpha_i == 0.0
|
.L223: //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
ld.d t1, X, 0 * SIZE
|
ld.d t1, X, 0 * SIZE
|
||||||
|
|
|
@ -97,7 +97,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
|
b .L113 //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
|
|
||||||
.L14:
|
.L14:
|
||||||
bceqz $fcc1, .L112 //alpha_r == 0.0 && alpha_i != 0.0
|
bceqz $fcc1, .L114 //alpha_r == 0.0 && alpha_i != 0.0
|
||||||
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
|
b .L111 //alpha_r == 0.0 && alpha_i == 0.0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -116,48 +116,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L997
|
b .L997
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L112: //alpha_r == 0.0 && alpha_i != 0.0
|
|
||||||
vld VX0, X, 0 * SIZE
|
|
||||||
#ifdef DOUBLE
|
|
||||||
vld VX1, X, 2 * SIZE
|
|
||||||
vpickev.d x1, VX1, VX0
|
|
||||||
vpickod.d x2, VX1, VX0
|
|
||||||
vfmul.d x3, VXAI, x2
|
|
||||||
vfsub.d x3, VXZ, x3
|
|
||||||
vfmul.d x4, VXAI, x1
|
|
||||||
vilvl.d VX2, x4 ,x3
|
|
||||||
vilvh.d VX3, x4, x3
|
|
||||||
vst VX2, X, 0 * SIZE
|
|
||||||
vst VX3, X, 2 * SIZE
|
|
||||||
vld VX0, X, 4 * SIZE
|
|
||||||
vld VX1, X, 6 * SIZE
|
|
||||||
vpickev.d x1, VX1, VX0
|
|
||||||
vpickod.d x2, VX1, VX0
|
|
||||||
vfmul.d x3, VXAI, x2
|
|
||||||
vfsub.d x3, VXZ, x3
|
|
||||||
vfmul.d x4, VXAI, x1
|
|
||||||
vilvl.d VX2, x4 ,x3
|
|
||||||
vilvh.d VX3, x4, x3
|
|
||||||
vst VX2, X, 4 * SIZE
|
|
||||||
vst VX3, X, 6 * SIZE
|
|
||||||
#else
|
|
||||||
vld VX1, X, 4 * SIZE
|
|
||||||
vpickev.w x1, VX1, VX0
|
|
||||||
vpickod.w x2, VX1, VX0
|
|
||||||
vfmul.s x3, VXAI, x2
|
|
||||||
vfsub.s x3, VXZ, x3
|
|
||||||
vfmul.s x4, VXAI, x1
|
|
||||||
vilvl.w VX2, x4 ,x3
|
|
||||||
vilvh.w VX3, x4, x3
|
|
||||||
vst VX2, X, 0 * SIZE
|
|
||||||
vst VX3, X, 4 * SIZE
|
|
||||||
#endif
|
|
||||||
addi.d X, X, 8 * SIZE
|
|
||||||
addi.d I, I, -1
|
|
||||||
blt $r0, I, .L112
|
|
||||||
b .L997
|
|
||||||
.align 3
|
|
||||||
|
|
||||||
.L113: //alpha_r != 0.0 && alpha_i == 0.0
|
.L113: //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
vld VX0, X, 0 * SIZE
|
vld VX0, X, 0 * SIZE
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
|
@ -256,7 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
|
b .L223 //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
|
|
||||||
.L24:
|
.L24:
|
||||||
bceqz $fcc1, .L222 //alpha_r == 0.0 && alpha_i != 0.0
|
bceqz $fcc1, .L224 //alpha_r == 0.0 && alpha_i != 0.0
|
||||||
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
|
b .L221 //alpha_r == 0.0 && alpha_i == 0.0
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
|
@ -292,90 +250,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
b .L997
|
b .L997
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
.L222: //alpha_r == 0.0 && alpha_i != 0.0
|
|
||||||
#ifdef DOUBLE
|
|
||||||
ld.d t1, X, 0 * SIZE
|
|
||||||
ld.d t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.d t3, X, 0 * SIZE
|
|
||||||
ld.d t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
vinsgr2vr.d x1, t1, 0
|
|
||||||
vinsgr2vr.d x2, t2, 0
|
|
||||||
vinsgr2vr.d x1, t3, 1
|
|
||||||
vinsgr2vr.d x2, t4, 1
|
|
||||||
vfmul.d x3, VXAI, x2
|
|
||||||
vfsub.d x3, VXZ, x3
|
|
||||||
vfmul.d x4, VXAI, x1
|
|
||||||
vstelm.d x3, XX, 0 * SIZE, 0
|
|
||||||
vstelm.d x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.d x3, XX, 0 * SIZE, 1
|
|
||||||
vstelm.d x4, XX, 1 * SIZE, 1
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
|
|
||||||
ld.d t1, X, 0 * SIZE
|
|
||||||
ld.d t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.d t3, X, 0 * SIZE
|
|
||||||
ld.d t4, X, 1 * SIZE
|
|
||||||
vinsgr2vr.d x1, t1, 0
|
|
||||||
vinsgr2vr.d x2, t2, 0
|
|
||||||
vinsgr2vr.d x1, t3, 1
|
|
||||||
vinsgr2vr.d x2, t4, 1
|
|
||||||
add.d X, X, INCX
|
|
||||||
vfmul.d x3, VXAI, x2
|
|
||||||
vfsub.d x3, VXZ, x3
|
|
||||||
vfmul.d x4, VXAI, x1
|
|
||||||
addi.d I, I, -1
|
|
||||||
vstelm.d x3, XX, 0 * SIZE, 0
|
|
||||||
vstelm.d x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.d x3, XX, 0 * SIZE, 1
|
|
||||||
vstelm.d x4, XX, 1 * SIZE, 1
|
|
||||||
#else
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
vinsgr2vr.w x1, t1, 0
|
|
||||||
vinsgr2vr.w x2, t2, 0
|
|
||||||
vinsgr2vr.w x1, t3, 1
|
|
||||||
vinsgr2vr.w x2, t4, 1
|
|
||||||
ld.w t1, X, 0 * SIZE
|
|
||||||
ld.w t2, X, 1 * SIZE
|
|
||||||
add.d X, X, INCX
|
|
||||||
ld.w t3, X, 0 * SIZE
|
|
||||||
ld.w t4, X, 1 * SIZE
|
|
||||||
vinsgr2vr.w x1, t1, 2
|
|
||||||
vinsgr2vr.w x2, t2, 2
|
|
||||||
vinsgr2vr.w x1, t3, 3
|
|
||||||
vinsgr2vr.w x2, t4, 3
|
|
||||||
add.d X, X, INCX
|
|
||||||
|
|
||||||
vfmul.s x3, VXAI, x2
|
|
||||||
vfsub.s x3, VXZ, x3
|
|
||||||
vfmul.s x4, VXAI, x1
|
|
||||||
addi.d I, I, -1
|
|
||||||
vstelm.w x3, XX, 0 * SIZE, 0
|
|
||||||
vstelm.w x4, XX, 1 * SIZE, 0
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.w x3, XX, 0 * SIZE, 1
|
|
||||||
vstelm.w x4, XX, 1 * SIZE, 1
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.w x3, XX, 0 * SIZE, 2
|
|
||||||
vstelm.w x4, XX, 1 * SIZE, 2
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
vstelm.w x3, XX, 0 * SIZE, 3
|
|
||||||
vstelm.w x4, XX, 1 * SIZE, 3
|
|
||||||
#endif
|
|
||||||
add.d XX, XX, INCX
|
|
||||||
blt $r0, I, .L222
|
|
||||||
b .L997
|
|
||||||
.align 3
|
|
||||||
|
|
||||||
.L223: //alpha_r != 0.0 && alpha_i == 0.0
|
.L223: //alpha_r != 0.0 && alpha_i == 0.0
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
ld.d t1, X, 0 * SIZE
|
ld.d t1, X, 0 * SIZE
|
||||||
|
|
|
@ -69,16 +69,16 @@ static void zscal_kernel_8( BLASLONG n, FLOAT *alpha , FLOAT *x )
|
||||||
|
|
||||||
for( i=0; i<n; i+=4 )
|
for( i=0; i<n; i+=4 )
|
||||||
{
|
{
|
||||||
t0 = da_r *x[0] - da_i *x[1];
|
t0 = da_r *x[0] - da_i *x[1];
|
||||||
t1 = da_r *x[2] - da_i *x[3];
|
t1 = da_r *x[2] - da_i *x[3];
|
||||||
t2 = da_r *x[4] - da_i *x[5];
|
t2 = da_r *x[4] - da_i *x[5];
|
||||||
t3 = da_r *x[6] - da_i *x[7];
|
t3 = da_r *x[6] - da_i *x[7];
|
||||||
|
|
||||||
x[1] = da_r * x[1] + da_i * x[0];
|
x[1] = da_r * x[1] + da_i * x[0];
|
||||||
x[3] = da_r * x[3] + da_i * x[2];
|
x[3] = da_r * x[3] + da_i * x[2];
|
||||||
x[5] = da_r * x[5] + da_i * x[4];
|
x[5] = da_r * x[5] + da_i * x[4];
|
||||||
x[7] = da_r * x[7] + da_i * x[6];
|
x[7] = da_r * x[7] + da_i * x[6];
|
||||||
|
|
||||||
x[0] = t0;
|
x[0] = t0;
|
||||||
x[2] = t1;
|
x[2] = t1;
|
||||||
x[4] = t2;
|
x[4] = t2;
|
||||||
|
@ -99,16 +99,16 @@ static void zscal_kernel_8_zero_r( BLASLONG n, FLOAT *alpha , FLOAT *x )
|
||||||
|
|
||||||
for( i=0; i<n; i+=4 )
|
for( i=0; i<n; i+=4 )
|
||||||
{
|
{
|
||||||
t0 = - da_i *x[1];
|
t0 = - da_i *x[1];
|
||||||
t1 = - da_i *x[3];
|
t1 = - da_i *x[3];
|
||||||
t2 = - da_i *x[5];
|
t2 = - da_i *x[5];
|
||||||
t3 = - da_i *x[7];
|
t3 = - da_i *x[7];
|
||||||
|
|
||||||
x[1] = da_i * x[0];
|
x[1] = da_i * x[0];
|
||||||
x[3] = da_i * x[2];
|
x[3] = da_i * x[2];
|
||||||
x[5] = da_i * x[4];
|
x[5] = da_i * x[4];
|
||||||
x[7] = da_i * x[6];
|
x[7] = da_i * x[6];
|
||||||
|
|
||||||
x[0] = t0;
|
x[0] = t0;
|
||||||
x[2] = t1;
|
x[2] = t1;
|
||||||
x[4] = t2;
|
x[4] = t2;
|
||||||
|
@ -129,16 +129,16 @@ static void zscal_kernel_8_zero_i( BLASLONG n, FLOAT *alpha , FLOAT *x )
|
||||||
|
|
||||||
for( i=0; i<n; i+=4 )
|
for( i=0; i<n; i+=4 )
|
||||||
{
|
{
|
||||||
t0 = da_r *x[0];
|
t0 = da_r *x[0];
|
||||||
t1 = da_r *x[2];
|
t1 = da_r *x[2];
|
||||||
t2 = da_r *x[4];
|
t2 = da_r *x[4];
|
||||||
t3 = da_r *x[6];
|
t3 = da_r *x[6];
|
||||||
|
|
||||||
x[1] = da_r * x[1];
|
x[1] = da_r * x[1];
|
||||||
x[3] = da_r * x[3];
|
x[3] = da_r * x[3];
|
||||||
x[5] = da_r * x[5];
|
x[5] = da_r * x[5];
|
||||||
x[7] = da_r * x[7];
|
x[7] = da_r * x[7];
|
||||||
|
|
||||||
x[0] = t0;
|
x[0] = t0;
|
||||||
x[2] = t1;
|
x[2] = t1;
|
||||||
x[4] = t2;
|
x[4] = t2;
|
||||||
|
@ -157,14 +157,14 @@ static void zscal_kernel_8_zero( BLASLONG n, FLOAT *alpha , FLOAT *x )
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
for( i=0; i<n; i+=4 )
|
for( i=0; i<n; i+=4 )
|
||||||
{
|
{
|
||||||
x[0] = 0.0;
|
x[0] = 0.0;
|
||||||
x[1] = 0.0;
|
x[1] = 0.0;
|
||||||
x[2] = 0.0;
|
x[2] = 0.0;
|
||||||
x[3] = 0.0;
|
x[3] = 0.0;
|
||||||
x[4] = 0.0;
|
x[4] = 0.0;
|
||||||
x[5] = 0.0;
|
x[5] = 0.0;
|
||||||
x[6] = 0.0;
|
x[6] = 0.0;
|
||||||
x[7] = 0.0;
|
x[7] = 0.0;
|
||||||
x+=8;
|
x+=8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -186,10 +186,10 @@ static void zscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
|
||||||
|
|
||||||
for ( i=0; i<n; i+=4 )
|
for ( i=0; i<n; i+=4 )
|
||||||
{
|
{
|
||||||
t0 = da_r * x[0] - da_i *x[1];
|
t0 = da_r * x[0] - da_i *x[1];
|
||||||
t1 = da_r * x[inc_x] - da_i *x[inc_x + 1];
|
t1 = da_r * x[inc_x] - da_i *x[inc_x + 1];
|
||||||
t2 = da_r * x[inc_x2] - da_i *x[inc_x2 + 1];
|
t2 = da_r * x[inc_x2] - da_i *x[inc_x2 + 1];
|
||||||
t3 = da_r * x[inc_x3] - da_i *x[inc_x3 + 1];
|
t3 = da_r * x[inc_x3] - da_i *x[inc_x3 + 1];
|
||||||
|
|
||||||
x[1] = da_i * x[0] + da_r * x[1];
|
x[1] = da_i * x[0] + da_r * x[1];
|
||||||
x[inc_x +1] = da_i * x[inc_x] + da_r * x[inc_x +1];
|
x[inc_x +1] = da_i * x[inc_x] + da_r * x[inc_x +1];
|
||||||
|
@ -228,7 +228,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
{
|
{
|
||||||
while(j < n1)
|
while(j < n1)
|
||||||
{
|
{
|
||||||
|
|
||||||
x[i]=0.0;
|
x[i]=0.0;
|
||||||
x[i+1]=0.0;
|
x[i+1]=0.0;
|
||||||
x[i+inc_x]=0.0;
|
x[i+inc_x]=0.0;
|
||||||
|
@ -240,7 +240,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
|
|
||||||
while(j < n)
|
while(j < n)
|
||||||
{
|
{
|
||||||
|
|
||||||
x[i]=0.0;
|
x[i]=0.0;
|
||||||
x[i+1]=0.0;
|
x[i+1]=0.0;
|
||||||
i += inc_x ;
|
i += inc_x ;
|
||||||
|
@ -253,11 +253,17 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
{
|
{
|
||||||
while(j < n1)
|
while(j < n1)
|
||||||
{
|
{
|
||||||
|
|
||||||
temp0 = -da_i * x[i+1];
|
if (isnan(x[i]) || isinf(x[i]))
|
||||||
|
temp0 = NAN;
|
||||||
|
else
|
||||||
|
temp0 = -da_i * x[i+1];
|
||||||
x[i+1] = da_i * x[i];
|
x[i+1] = da_i * x[i];
|
||||||
x[i] = temp0;
|
x[i] = temp0;
|
||||||
temp1 = -da_i * x[i+1+inc_x];
|
if (isnan(x[i+inc_x]) || isinf(x[i+inc_x]))
|
||||||
|
temp1 = NAN;
|
||||||
|
else
|
||||||
|
temp1 = -da_i * x[i+1+inc_x];
|
||||||
x[i+1+inc_x] = da_i * x[i+inc_x];
|
x[i+1+inc_x] = da_i * x[i+inc_x];
|
||||||
x[i+inc_x] = temp1;
|
x[i+inc_x] = temp1;
|
||||||
i += 2*inc_x ;
|
i += 2*inc_x ;
|
||||||
|
@ -267,8 +273,11 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
|
|
||||||
while(j < n)
|
while(j < n)
|
||||||
{
|
{
|
||||||
|
|
||||||
temp0 = -da_i * x[i+1];
|
if (isnan(x[i]) || isinf(x[i]))
|
||||||
|
temp0 = NAN;
|
||||||
|
else
|
||||||
|
temp0 = -da_i * x[i+1];
|
||||||
x[i+1] = da_i * x[i];
|
x[i+1] = da_i * x[i];
|
||||||
x[i] = temp0;
|
x[i] = temp0;
|
||||||
i += inc_x ;
|
i += inc_x ;
|
||||||
|
@ -291,7 +300,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
|
|
||||||
while(j < n1)
|
while(j < n1)
|
||||||
{
|
{
|
||||||
|
|
||||||
temp0 = da_r * x[i];
|
temp0 = da_r * x[i];
|
||||||
x[i+1] = da_r * x[i+1];
|
x[i+1] = da_r * x[i+1];
|
||||||
x[i] = temp0;
|
x[i] = temp0;
|
||||||
|
@ -305,7 +314,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
|
|
||||||
while(j < n)
|
while(j < n)
|
||||||
{
|
{
|
||||||
|
|
||||||
temp0 = da_r * x[i];
|
temp0 = da_r * x[i];
|
||||||
x[i+1] = da_r * x[i+1];
|
x[i+1] = da_r * x[i+1];
|
||||||
x[i] = temp0;
|
x[i] = temp0;
|
||||||
|
@ -368,7 +377,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
}
|
}
|
||||||
i = n1 << 1;
|
i = n1 << 1;
|
||||||
j = n1;
|
j = n1;
|
||||||
|
|
||||||
if ( da_r == 0.0 || da_r != da_r )
|
if ( da_r == 0.0 || da_r != da_r )
|
||||||
{
|
{
|
||||||
if ( da_i == 0.0 )
|
if ( da_i == 0.0 )
|
||||||
|
@ -385,7 +394,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else if (da_r < -FLT_MAX || da_r > FLT_MAX) {
|
else if (da_r < -FLT_MAX || da_r > FLT_MAX) {
|
||||||
while(j < n)
|
while(j < n)
|
||||||
{
|
{
|
||||||
x[i]= NAN;
|
x[i]= NAN;
|
||||||
|
@ -404,7 +413,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
if (x[i] < -FLT_MAX || x[i] > FLT_MAX)
|
if (x[i] < -FLT_MAX || x[i] > FLT_MAX)
|
||||||
temp0 = NAN;
|
temp0 = NAN;
|
||||||
x[i+1] = da_i * x[i];
|
x[i+1] = da_i * x[i];
|
||||||
if ( x[i] == x[i]) //preserve NaN
|
if ( x[i] == x[i]) //preserve NaN
|
||||||
x[i] = temp0;
|
x[i] = temp0;
|
||||||
i += 2 ;
|
i += 2 ;
|
||||||
j++;
|
j++;
|
||||||
|
@ -420,7 +429,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
{
|
{
|
||||||
while(j < n)
|
while(j < n)
|
||||||
{
|
{
|
||||||
|
|
||||||
temp0 = da_r * x[i];
|
temp0 = da_r * x[i];
|
||||||
x[i+1] = da_r * x[i+1];
|
x[i+1] = da_r * x[i+1];
|
||||||
x[i] = temp0;
|
x[i] = temp0;
|
||||||
|
@ -442,7 +451,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,6 +16,7 @@ else ()
|
||||||
test_dnrm2.c
|
test_dnrm2.c
|
||||||
test_swap.c
|
test_swap.c
|
||||||
test_zscal.c
|
test_zscal.c
|
||||||
|
test_amin.c
|
||||||
)
|
)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,8 @@ UTESTBIN=openblas_utest
|
||||||
|
|
||||||
include $(TOPDIR)/Makefile.system
|
include $(TOPDIR)/Makefile.system
|
||||||
|
|
||||||
OBJS=utest_main.o test_min.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o test_dnrm2.o test_zscal.o
|
OBJS=utest_main.o test_min.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o test_dnrm2.o test_zscal.o \
|
||||||
|
test_amin.o
|
||||||
#test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o
|
#test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o
|
||||||
|
|
||||||
ifneq ($(NO_LAPACK), 1)
|
ifneq ($(NO_LAPACK), 1)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*****************************************************************************
|
/*****************************************************************************
|
||||||
Copyright (c) 2011-2016, The OpenBLAS Project
|
Copyright (c) 2011-2024, The OpenBLAS Project
|
||||||
All rights reserved.
|
All rights reserved.
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -13,9 +13,9 @@ met:
|
||||||
notice, this list of conditions and the following disclaimer in
|
notice, this list of conditions and the following disclaimer in
|
||||||
the documentation and/or other materials provided with the
|
the documentation and/or other materials provided with the
|
||||||
distribution.
|
distribution.
|
||||||
3. Neither the name of the OpenBLAS project nor the names of
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
its contributors may be used to endorse or promote products
|
its contributors may be used to endorse or promote products
|
||||||
derived from this software without specific prior written
|
derived from this software without specific prior written
|
||||||
permission.
|
permission.
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
@ -57,4 +57,31 @@ CTEST(amax, damax){
|
||||||
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef BUILD_COMPLEX
|
||||||
|
CTEST(amax, scamax){
|
||||||
|
blasint N = 9, inc = 1;
|
||||||
|
float te_max = 0.0, tr_max = 0.0;
|
||||||
|
float x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
|
||||||
|
-9.9, 10.10, -1.1, 2.2, -3.3, 4.4, -5.5, 6.6,
|
||||||
|
-7.7, 8.8 };
|
||||||
|
|
||||||
|
te_max = BLASFUNC(scamax)(&N, x, &inc);
|
||||||
|
tr_max = 20.0;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef BUILD_COMPLEX16
|
||||||
|
CTEST(amax, dzamax){
|
||||||
|
blasint N = 9, inc = 1;
|
||||||
|
double te_max = 0.0, tr_max = 0.0;
|
||||||
|
double x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
|
||||||
|
-9.9, 10.10, -1.1, 2.2, -3.3, 4.4, -5.5, 6.6,
|
||||||
|
-7.7, 8.8 };
|
||||||
|
|
||||||
|
te_max = BLASFUNC(dzamax)(&N, x, &inc);
|
||||||
|
tr_max = 20.0;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), DOUBLE_EPS);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011-2024, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "openblas_utest.h"
|
||||||
|
|
||||||
|
#ifdef BUILD_SINGLE
|
||||||
|
CTEST(amin, samin){
|
||||||
|
blasint N = 3, inc = 1;
|
||||||
|
float te_min = 0.0, tr_min = 0.0;
|
||||||
|
float x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
|
||||||
|
-9.9 };
|
||||||
|
|
||||||
|
te_min = BLASFUNC(samin)(&N, x, &inc);
|
||||||
|
tr_min = 1.1;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef BUILD_DOUBLE
|
||||||
|
CTEST(amin, damin){
|
||||||
|
blasint N = 3, inc = 1;
|
||||||
|
double te_min = 0.0, tr_min = 0.0;
|
||||||
|
double x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
|
||||||
|
-9.9 };
|
||||||
|
|
||||||
|
te_min = BLASFUNC(damin)(&N, x, &inc);
|
||||||
|
tr_min = 1.1;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), DOUBLE_EPS);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef BUILD_COMPLEX
|
||||||
|
CTEST(amin, scamin){
|
||||||
|
blasint N = 9, inc = 1;
|
||||||
|
float te_min = 0.0, tr_min = 0.0;
|
||||||
|
float x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
|
||||||
|
-9.9, 10.10, -1.1, 2.2, -3.3, 4.4, -5.5, 6.6,
|
||||||
|
-7.7, 8.8 };
|
||||||
|
|
||||||
|
te_min = BLASFUNC(scamin)(&N, x, &inc);
|
||||||
|
tr_min = 3.3;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), SINGLE_EPS);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifdef BUILD_COMPLEX16
|
||||||
|
CTEST(amin, dzamin){
|
||||||
|
blasint N = 9, inc = 1;
|
||||||
|
double te_min = 0.0, tr_min = 0.0;
|
||||||
|
double x[] = { -1.1, 2.2, -3.3, 4.4, -5.5, 6.6, -7.7, 8.8,
|
||||||
|
-9.9, 10.10, -1.1, 2.2, -3.3, 4.4, -5.5, 6.6,
|
||||||
|
-7.7, 8.8 };
|
||||||
|
|
||||||
|
te_min = BLASFUNC(dzamin)(&N, x, &inc);
|
||||||
|
tr_min = 3.3;
|
||||||
|
|
||||||
|
ASSERT_DBL_NEAR_TOL((double)(tr_min), (double)(te_min), DOUBLE_EPS);
|
||||||
|
}
|
||||||
|
#endif
|
|
@ -20,6 +20,18 @@ CTEST(zscal, i_nan)
|
||||||
ASSERT_TRUE(isnan(nan[17]));
|
ASSERT_TRUE(isnan(nan[17]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CTEST(zscal, i_nan_inc_2)
|
||||||
|
{
|
||||||
|
double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
|
||||||
|
double nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0,
|
||||||
|
NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0};
|
||||||
|
cblas_zscal(9, i, &nan, 2);
|
||||||
|
ASSERT_TRUE(isnan(nan[0]));
|
||||||
|
ASSERT_TRUE(isnan(nan[1]));
|
||||||
|
ASSERT_TRUE(isnan(nan[16]));
|
||||||
|
ASSERT_TRUE(isnan(nan[17]));
|
||||||
|
}
|
||||||
|
|
||||||
CTEST(zscal, nan_i)
|
CTEST(zscal, nan_i)
|
||||||
{
|
{
|
||||||
double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
|
double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
|
||||||
|
@ -30,7 +42,19 @@ CTEST(zscal, nan_i)
|
||||||
ASSERT_TRUE(isnan(i[16]));
|
ASSERT_TRUE(isnan(i[16]));
|
||||||
ASSERT_TRUE(isnan(i[17]));
|
ASSERT_TRUE(isnan(i[17]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CTEST(zscal, nan_i_inc_2)
|
||||||
|
{
|
||||||
|
double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1,
|
||||||
|
0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
|
||||||
|
double nan[] = {NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0};
|
||||||
|
cblas_zscal(9, &nan, &i, 2);
|
||||||
|
ASSERT_TRUE(isnan(i[0]));
|
||||||
|
ASSERT_TRUE(isnan(i[1]));
|
||||||
|
ASSERT_TRUE(isnan(i[16]));
|
||||||
|
ASSERT_TRUE(isnan(i[17]));
|
||||||
|
}
|
||||||
|
|
||||||
CTEST(zscal, i_inf)
|
CTEST(zscal, i_inf)
|
||||||
{
|
{
|
||||||
double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
|
double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
|
||||||
|
@ -40,7 +64,19 @@ CTEST(zscal, i_inf)
|
||||||
ASSERT_TRUE(isinf(inf[1]));
|
ASSERT_TRUE(isinf(inf[1]));
|
||||||
ASSERT_TRUE(isnan(inf[16]));
|
ASSERT_TRUE(isnan(inf[16]));
|
||||||
ASSERT_TRUE(isinf(inf[17]));
|
ASSERT_TRUE(isinf(inf[17]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CTEST(zscal, i_inf_inc_2)
|
||||||
|
{
|
||||||
|
double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
|
||||||
|
double inf[] = {INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0,
|
||||||
|
INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0};
|
||||||
|
cblas_zscal(9, i, &inf, 2);
|
||||||
|
ASSERT_TRUE(isnan(inf[0]));
|
||||||
|
ASSERT_TRUE(isinf(inf[1]));
|
||||||
|
ASSERT_TRUE(isnan(inf[16]));
|
||||||
|
ASSERT_TRUE(isinf(inf[17]));
|
||||||
|
}
|
||||||
|
|
||||||
CTEST(zscal, inf_i)
|
CTEST(zscal, inf_i)
|
||||||
{
|
{
|
||||||
|
@ -53,4 +89,16 @@ CTEST(zscal, inf_i)
|
||||||
ASSERT_TRUE(isinf(i[17]));
|
ASSERT_TRUE(isinf(i[17]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CTEST(zscal, inf_i_inc_2)
|
||||||
|
{
|
||||||
|
double i[] = {0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1,
|
||||||
|
0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1, 0,1 };
|
||||||
|
double inf[] = {INFINITY, 0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0, INFINITY,0};
|
||||||
|
cblas_zscal(9, &inf, &i, 2);
|
||||||
|
ASSERT_TRUE(isnan(i[0]));
|
||||||
|
ASSERT_TRUE(isinf(i[1]));
|
||||||
|
ASSERT_TRUE(isnan(i[16]));
|
||||||
|
ASSERT_TRUE(isinf(i[17]));
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue