commit
403cde104e
|
@ -1,7 +1,7 @@
|
||||||
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
|
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
|
||||||
ifeq ($(OSNAME), Android)
|
ifeq ($(OSNAME), Android)
|
||||||
CCOMMON_OPT += -mfpu=neon
|
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||||
FCOMMON_OPT += -mfpu=neon
|
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||||
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||||
|
|
|
@ -47,7 +47,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}MAXKERNEL}" "" "max_k" false "" "" false ${float_type})
|
||||||
endif ()
|
endif ()
|
||||||
if (DEFINED ${float_char}MINKERNEL)
|
if (DEFINED ${float_char}MINKERNEL)
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "" "min_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}MINKERNEL}" "USE_MIN" "min_k" false "" "" false ${float_type})
|
||||||
endif ()
|
endif ()
|
||||||
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${I${float_char}AMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false ${float_type})
|
||||||
|
@ -55,7 +55,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MAXKERNEL}" "" "i*max_k" false "" "" false ${float_type})
|
||||||
endif ()
|
endif ()
|
||||||
if (DEFINED I${float_char}MINKERNEL)
|
if (DEFINED I${float_char}MINKERNEL)
|
||||||
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "" "i*min_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${I${float_char}MINKERNEL}" "USE_MIN" "i*min_k" false "" "" false ${float_type})
|
||||||
endif ()
|
endif ()
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}ASUMKERNEL}" "" "asum_k" false "" "" false ${float_type})
|
||||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type})
|
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPYKERNEL}" "" "axpy_k" false "" "" false ${float_type})
|
||||||
|
|
|
@ -171,7 +171,7 @@ IXAMAXKERNEL = izamax.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef ISAMINKERNEL
|
ifndef ISAMINKERNEL
|
||||||
ISAMINKERNEL = iamax.S
|
ISAMINKERNEL = iamax_sse.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef IDAMINKERNEL
|
ifndef IDAMINKERNEL
|
||||||
|
@ -207,7 +207,7 @@ IQMAXKERNEL = iamax.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef ISMINKERNEL
|
ifndef ISMINKERNEL
|
||||||
ISMINKERNEL = iamax.S
|
ISMINKERNEL = iamax_sse.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef IDMINKERNEL
|
ifndef IDMINKERNEL
|
||||||
|
|
|
@ -36,10 +36,6 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
/* This kernel was found to give wrong results when used for ISMIN/ISAMIN
|
|
||||||
with increment != 1, although it appears to be correct for corresponding
|
|
||||||
MAX operations. See issue 2116 */
|
|
||||||
|
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
@ -59,6 +55,15 @@
|
||||||
#define MAXSS minss
|
#define MAXSS minss
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
.macro LOAD_AND_COMPARE_TO_MXX REG
|
||||||
|
movss 0 * SIZE(X), \REG
|
||||||
|
addq INCX, X
|
||||||
|
#ifdef USE_ABS
|
||||||
|
andps %xmm15, \REG
|
||||||
|
#endif
|
||||||
|
cmpeqss %xmm0, \REG
|
||||||
|
.endm
|
||||||
|
|
||||||
#include "l1param.h"
|
#include "l1param.h"
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
@ -830,61 +835,14 @@
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
.L93:
|
.L93:
|
||||||
movss 0 * SIZE(X), %xmm1
|
LOAD_AND_COMPARE_TO_MXX %xmm1
|
||||||
addq INCX, X
|
LOAD_AND_COMPARE_TO_MXX %xmm2
|
||||||
#ifdef USE_ABS
|
LOAD_AND_COMPARE_TO_MXX %xmm3
|
||||||
andps %xmm15, %xmm1
|
LOAD_AND_COMPARE_TO_MXX %xmm4
|
||||||
#endif
|
LOAD_AND_COMPARE_TO_MXX %xmm5
|
||||||
cmpeqss %xmm0, %xmm1
|
LOAD_AND_COMPARE_TO_MXX %xmm6
|
||||||
|
LOAD_AND_COMPARE_TO_MXX %xmm7
|
||||||
movss 0 * SIZE(X), %xmm2
|
LOAD_AND_COMPARE_TO_MXX %xmm8
|
||||||
addq INCX, X
|
|
||||||
#ifdef USE_ABS
|
|
||||||
andps %xmm15, %xmm2
|
|
||||||
#endif
|
|
||||||
cmpeqss %xmm0, %xmm2
|
|
||||||
|
|
||||||
movss 0 * SIZE(X), %xmm3
|
|
||||||
addq INCX, X
|
|
||||||
#ifdef USE_ABS
|
|
||||||
andps %xmm15, %xmm3
|
|
||||||
#endif
|
|
||||||
cmpeqss %xmm0, %xmm3
|
|
||||||
|
|
||||||
movss 0 * SIZE(X), %xmm4
|
|
||||||
addq INCX, X
|
|
||||||
#ifdef USE_ABS
|
|
||||||
andps %xmm15, %xmm4
|
|
||||||
#endif
|
|
||||||
cmpeqss %xmm0, %xmm4
|
|
||||||
|
|
||||||
movss 0 * SIZE(X), %xmm5
|
|
||||||
addq INCX, X
|
|
||||||
#ifdef USE_ABS
|
|
||||||
andps %xmm15, %xmm5
|
|
||||||
#endif
|
|
||||||
cmpeqps %xmm0, %xmm5
|
|
||||||
|
|
||||||
movss 0 * SIZE(X), %xmm6
|
|
||||||
addq INCX, X
|
|
||||||
#ifdef USE_ABS
|
|
||||||
andps %xmm15, %xmm6
|
|
||||||
#endif
|
|
||||||
cmpeqss %xmm0, %xmm6
|
|
||||||
|
|
||||||
movss 0 * SIZE(X), %xmm7
|
|
||||||
addq INCX, X
|
|
||||||
#ifdef USE_ABS
|
|
||||||
andps %xmm15, %xmm7
|
|
||||||
#endif
|
|
||||||
cmpeqss %xmm0, %xmm7
|
|
||||||
|
|
||||||
movss 0 * SIZE(X), %xmm8
|
|
||||||
addq INCX, X
|
|
||||||
#ifdef USE_ABS
|
|
||||||
andps %xmm15, %xmm8
|
|
||||||
#endif
|
|
||||||
cmpeqss %xmm0, %xmm8
|
|
||||||
|
|
||||||
orps %xmm2, %xmm1
|
orps %xmm2, %xmm1
|
||||||
orps %xmm4, %xmm3
|
orps %xmm4, %xmm3
|
||||||
|
|
|
@ -7,6 +7,7 @@ else ()
|
||||||
set(OpenBLAS_utest_src
|
set(OpenBLAS_utest_src
|
||||||
utest_main.c
|
utest_main.c
|
||||||
test_amax.c
|
test_amax.c
|
||||||
|
test_ismin.c
|
||||||
test_rotmg.c
|
test_rotmg.c
|
||||||
test_rot.c
|
test_rot.c
|
||||||
test_axpy.c
|
test_axpy.c
|
||||||
|
|
|
@ -11,7 +11,7 @@ UTESTBIN=openblas_utest
|
||||||
|
|
||||||
include $(TOPDIR)/Makefile.system
|
include $(TOPDIR)/Makefile.system
|
||||||
|
|
||||||
OBJS=utest_main.o test_amax.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o
|
OBJS=utest_main.o test_amax.o test_ismin.o test_rotmg.o test_axpy.o test_dotu.o test_dsdot.o test_swap.o test_rot.o
|
||||||
#test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o
|
#test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o
|
||||||
|
|
||||||
ifneq ($(NO_LAPACK), 1)
|
ifneq ($(NO_LAPACK), 1)
|
||||||
|
|
|
@ -0,0 +1,89 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2020, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
#include "openblas_utest.h"
|
||||||
|
|
||||||
|
#define ELEMENTS 50
|
||||||
|
#define INCREMENT 2
|
||||||
|
|
||||||
|
CTEST(ismin, positive_step_2){
|
||||||
|
blasint i;
|
||||||
|
blasint N = ELEMENTS, inc = INCREMENT;
|
||||||
|
float x[ELEMENTS * INCREMENT];
|
||||||
|
for (i = 0; i < N * inc; i ++) {
|
||||||
|
x[i] = i + 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
x[8 * inc] = 0;
|
||||||
|
blasint index = BLASFUNC(ismin)(&N, x, &inc);
|
||||||
|
ASSERT_EQUAL(9, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(ismin, negative_step_2){
|
||||||
|
blasint i;
|
||||||
|
blasint N = ELEMENTS, inc = INCREMENT;
|
||||||
|
float x[ELEMENTS * INCREMENT];
|
||||||
|
for (i = 0; i < N * inc; i ++) {
|
||||||
|
x[i] = - i - 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
x[8 * inc] = -123456.0f;
|
||||||
|
blasint index = BLASFUNC(ismin)(&N, x, &inc);
|
||||||
|
ASSERT_EQUAL(9, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(ismax, positive_step_2){
|
||||||
|
blasint i;
|
||||||
|
blasint N = ELEMENTS, inc = INCREMENT;
|
||||||
|
float x[ELEMENTS * INCREMENT];
|
||||||
|
for (i = 0; i < N * inc; i ++) {
|
||||||
|
x[i] = i + 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
x[8 * inc] = 123456.0f;
|
||||||
|
blasint index = BLASFUNC(ismax)(&N, x, &inc);
|
||||||
|
ASSERT_EQUAL(9, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
CTEST(ismax, negative_step_2){
|
||||||
|
blasint i;
|
||||||
|
blasint N = ELEMENTS, inc = INCREMENT;
|
||||||
|
float x[ELEMENTS * INCREMENT];
|
||||||
|
for (i = 0; i < N * inc; i ++) {
|
||||||
|
x[i] = - i - 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
x[8 * inc] = 0;
|
||||||
|
blasint index = BLASFUNC(ismax)(&N, x, &inc);
|
||||||
|
ASSERT_EQUAL(9, index);
|
||||||
|
}
|
Loading…
Reference in New Issue