Merge remote-tracking branch 'upstream/develop' into dyn

This commit is contained in:
Isuru Fernando 2017-08-06 19:07:00 +05:30
commit 505b218829
32 changed files with 527 additions and 353 deletions

View File

@ -1,4 +1,119 @@
# XXX: Precise is already deprecated, new default is Trusty.
# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming
dist: precise
sudo: false
language: c
compiler: gcc
jobs:
include:
- &test-ubuntu
stage: test
addons:
apt:
packages:
- gfortran
before_script: &common-before
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
script:
- set -e
- make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64"
- <<: *test-ubuntu
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 USE_OPENMP=1"
- <<: *test-ubuntu
env:
- TARGET_BOX=LINUX64
- BTYPE="BINARY=64 INTERFACE64=1"
- <<: *test-ubuntu
addons:
apt:
packages:
- gcc-multilib
- gfortran-multilib
env:
- TARGET_BOX=LINUX32
- BTYPE="BINARY=32"
- stage: test
addons:
apt:
packages:
- binutils-mingw-w64-x86-64
- gcc-mingw-w64-x86-64
- gfortran-mingw-w64-x86-64
before_script: *common-before
script:
- make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
- TARGET_BOX=WIN64
- BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
# Build & test on Alpine Linux inside chroot, i.e. on system with musl libc.
# These jobs needs sudo, so Travis runs them on VM-based infrastructure
# which is slower than container-based infrastructure used for jobs
# that don't require sudo.
- &test-alpine
stage: test
dist: trusty
sudo: true
language: minimal
before_install:
- "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.6.0/alpine-chroot-install' \
&& echo 'a827a4ba3d0817e7c88bae17fe34e50204983d1e alpine-chroot-install' | sha1sum -c || exit 1"
- alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
install:
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
before_script: *common-before
script:
- set -e
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
- alpine make -C test $COMMON_FLAGS $BTYPE
- alpine make -C ctest $COMMON_FLAGS $BTYPE
- alpine make -C utest $COMMON_FLAGS $BTYPE
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64"
# XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS,
# so it's "allowed to fail" for now (see allow_failures).
- &test-alpine-openmp
<<: *test-alpine
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64 USE_OPENMP=1"
- <<: *test-alpine
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64 INTERFACE64=1"
# Build with the same flags as Alpine do in OpenBLAS package.
- <<: *test-alpine
env:
- TARGET_BOX=LINUX64_MUSL
- BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=core2"
allow_failures:
- <<: *test-alpine-openmp
# whitelist
branches:
only:
- master
- develop
notifications:
webhooks:
@ -7,32 +122,3 @@ notifications:
on_success: change # options: [always|never|change] default: always
on_failure: always # options: [always|never|change] default: always
on_start: never # options: [always|never|change] default: always
compiler:
- gcc
env:
- TARGET_BOX=LINUX64 BTYPE="BINARY=64"
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1"
- TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1"
- TARGET_BOX=LINUX32 BTYPE="BINARY=32"
- TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq gfortran
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
script:
- set -e
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
# whitelist
branches:
only:
- master
- develop

View File

@ -12,31 +12,36 @@ clone_folder: c:\projects\OpenBLAS
init:
- git config --global core.autocrlf input
build:
project: OpenBLAS.sln
clone_depth: 5
#branches to build
branches:
only:
- master
- develop
- cmake
skip_tags: true
matrix:
fast_finish: true
fast_finish: false
skip_commits:
# Add [av skip] to commit messages
message: /\[av skip\]/
environment:
matrix:
- COMPILER: clang-cl
- COMPILER: cl
install:
- if [%COMPILER%]==[clang-cl] call C:\Miniconda36-x64\Scripts\activate.bat
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
- if [%COMPILER%]==[clang-cl] conda install --yes clangdev ninja cmake
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
before_build:
- echo Running cmake...
- cd c:\projects\OpenBLAS
- cmake -G "Visual Studio 12 Win64" .
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 12 Win64" .
- if [%COMPILER%]==[clang-cl] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl .
build_script:
- cmake --build .
test_script:
- echo Running Test

View File

@ -28,6 +28,8 @@
set(FU "")
if(APPLE)
set(FU "_")
elseif(MSVC AND ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
set(FU "")
elseif(MSVC)
set(FU "_")
elseif(UNIX)
@ -59,7 +61,8 @@ endif ()
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on.
#
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check
set(ARCH ${CMAKE_SYSTEM_PROCESSOR})
set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture")
if (${ARCH} STREQUAL "AMD64")
set(ARCH "x86_64")
endif ()

View File

@ -51,7 +51,8 @@ else()
endif()
add_custom_command(
TARGET ${OpenBLAS_LIBNAME} PRE_LINK
OUTPUT ${PROJECT_BINARY_DIR}/openblas.def
#TARGET ${OpenBLAS_LIBNAME} PRE_LINK
COMMAND perl
ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def"
COMMENT "Create openblas.def file"

View File

@ -66,13 +66,12 @@ set(GETARCH_SRC
${CPUIDEMO}
)
if (NOT MSVC)
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
endif ()
if (MSVC)
if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC")
#Use generic for MSVC now
message("MSVC")
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC)
else()
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S)
endif ()
if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore")

View File

@ -495,6 +495,33 @@ static void __inline blas_lock(volatile BLASULONG *address){
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
#endif
#ifndef ASSEMBLER
/* C99 supports complex floating numbers natively, which GCC also offers as an
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
#define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>
#endif
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif
#endif
#include "param.h"
#include "common_param.h"
@ -524,31 +551,6 @@ static void __inline blas_lock(volatile BLASULONG *address){
#include <stdio.h>
#endif // NOINCLUDE
/* C99 supports complex floating numbers natively, which GCC also offers as an
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
#define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>
#endif
typedef float _Complex openblas_complex_float;
typedef double _Complex openblas_complex_double;
typedef xdouble _Complex openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I))
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I))
#else
#define OPENBLAS_COMPLEX_STRUCT
typedef struct { float real, imag; } openblas_complex_float;
typedef struct { double real, imag; } openblas_complex_double;
typedef struct { xdouble real, imag; } openblas_complex_xdouble;
#define openblas_make_complex_float(real, imag) {(real), (imag)}
#define openblas_make_complex_double(real, imag) {(real), (imag)}
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)}
#endif
#ifdef XDOUBLE
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i)

View File

@ -333,8 +333,8 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
float (*casum_k) (BLASLONG, float *, BLASLONG);
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float _Complex (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float _Complex (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
@ -496,8 +496,8 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
double (*znrm2_k) (BLASLONG, double *, BLASLONG);
double (*zasum_k) (BLASLONG, double *, BLASLONG);
int (*zcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double _Complex (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double _Complex (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_double (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
openblas_complex_double (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*zdrot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int (*zaxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@ -661,8 +661,8 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
xdouble (*xnrm2_k) (BLASLONG, xdouble *, BLASLONG);
xdouble (*xasum_k) (BLASLONG, xdouble *, BLASLONG);
int (*xcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
xdouble _Complex (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
xdouble _Complex (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_xdouble (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
openblas_complex_xdouble (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
int (*xqrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
int (*xaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);

View File

@ -230,8 +230,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
#ifndef TRANSA
range_m[num_cpu] = num_cpu * ((m + 15) & ~15);
if (range_m[num_cpu] > m) range_m[num_cpu] = m;
#else
range_m[num_cpu] = num_cpu * ((n + 15) & ~15);
if (range_m[num_cpu] > n) range_m[num_cpu] = n;
#endif
queue[num_cpu].mode = mode;

View File

@ -246,6 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
@ -285,6 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;
@ -316,6 +318,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * ((n + 15) & ~15);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel;

View File

@ -246,6 +246,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = spmv_kernel;
@ -285,6 +286,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = spmv_kernel;

View File

@ -177,6 +177,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode;
queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel;
@ -225,6 +226,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = symv_kernel;

View File

@ -288,6 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
@ -327,6 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
@ -356,6 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;

View File

@ -307,6 +307,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = tpmv_kernel;
@ -346,6 +347,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = tpmv_kernel;

View File

@ -346,6 +346,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
@ -385,6 +386,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;

View File

@ -155,7 +155,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef DYNAMIC_ARCH
gotoblas_t *gotoblas = NULL;
#endif
extern void openblas_warning(int verbose, const char * msg);
#ifndef SMP
@ -187,7 +186,7 @@ int i,n;
#if !defined(__GLIBC_PREREQ)
return nums;
#endif
#else
#if !__GLIBC_PREREQ(2, 3)
return nums;
#endif
@ -204,8 +203,7 @@ int i,n;
nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
#endif
return nums;
#endif
#else
cpusetp = CPU_ALLOC(nums);
if (cpusetp == NULL) return nums;
size = CPU_ALLOC_SIZE(nums);
@ -214,6 +212,8 @@ int i,n;
nums = CPU_COUNT_S(size,cpusetp);
CPU_FREE(cpusetp);
return nums;
#endif
#endif
}
#endif
#endif

View File

@ -1,7 +1,6 @@
include_directories(${PROJECT_SOURCE_DIR})
# Makefile
function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
set (OPENBLAS_SRC "")
@ -21,7 +20,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
endif ()
if (${ARCH} STREQUAL "x86")
if (NOT MSVC)
if (NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
GenerateNamedObjects("${KERNELDIR}/cpuid.S" "" "" false "" "" true)
else()
GenerateNamedObjects("${KERNELDIR}/cpuid_win.c" "" "" false "" "" true)

View File

@ -147,57 +147,57 @@ static FLOAT casum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
" fmov s6, "REG0" \n"
" fmov s7, "REG0" \n"
" cmp "N", xzr \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
" cmp "INC_X", xzr \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
" cmp "INC_X", #1 \n"
" bne .Lasum_kernel_S_BEGIN \n"
" bne 5f //asum_kernel_S_BEGIN \n"
".Lasum_kernel_F_BEGIN: \n"
"1: //asum_kernel_F_BEGIN: \n"
" asr "J", "N", #5 \n"
" cmp "J", xzr \n"
" beq .Lasum_kernel_F1 \n"
" beq 3f //asum_kernel_F1 \n"
".Lasum_kernel_F32: \n"
"2: //asum_kernel_F32: \n"
" "KERNEL_F32" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_F32 \n"
" bne 2b //asum_kernel_F32 \n"
" "KERNEL_F32_FINALIZE" \n"
".Lasum_kernel_F1: \n"
"3: //asum_kernel_F1: \n"
" ands "J", "N", #31 \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
".Lasum_kernel_F10: \n"
"4: //asum_kernel_F10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_F10 \n"
" b .Lasum_kernel_L999 \n"
" bne 4b //asum_kernel_F10 \n"
" b 9f //asum_kernel_L999 \n"
".Lasum_kernel_S_BEGIN: \n"
"5: //asum_kernel_S_BEGIN: \n"
" "INIT_S" \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Lasum_kernel_S1 \n"
" ble 7f //asum_kernel_S1 \n"
".Lasum_kernel_S4: \n"
"6: //asum_kernel_S4: \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_S4 \n"
" bne 6b //asum_kernel_S4 \n"
".Lasum_kernel_S1: \n"
"7: //asum_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
".Lasum_kernel_S10: \n"
"8: //asum_kernel_S10: \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_S10 \n"
" bne 8b //asum_kernel_S10 \n"
".Lasum_kernel_L999: \n"
"9: //asum_kernel_L999: \n"
" fmov %[ASUM_], "SUMFD" \n"
: [ASUM_] "=r" (asum) //%0

View File

@ -90,62 +90,62 @@ static int do_copy(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_
" mov "Y", %[Y_] \n"
" mov "INC_Y", %[INCY_] \n"
" cmp "N", xzr \n"
" ble .Lcopy_kernel_L999 \n"
" ble 8f //copy_kernel_L999 \n"
" cmp "INC_X", #1 \n"
" bne .Lcopy_kernel_S_BEGIN \n"
" bne 4f //copy_kernel_S_BEGIN \n"
" cmp "INC_Y", #1 \n"
" bne .Lcopy_kernel_S_BEGIN \n"
" bne 4f //copy_kernel_S_BEGIN \n"
".Lcopy_kernel_F_BEGIN: \n"
"// .Lcopy_kernel_F_BEGIN: \n"
" "INIT" \n"
" asr "J", "N", #"N_DIV_SHIFT" \n"
" cmp "J", xzr \n"
" beq .Lcopy_kernel_F1 \n"
" beq 2f //copy_kernel_F1 \n"
" .align 5 \n"
".Lcopy_kernel_F: \n"
"1: //copy_kernel_F: \n"
" "KERNEL_F" \n"
" subs "J", "J", #1 \n"
" bne .Lcopy_kernel_F \n"
" bne 1b //copy_kernel_F \n"
".Lcopy_kernel_F1: \n"
"2: //copy_kernel_F1: \n"
#if defined(COMPLEX) && defined(DOUBLE)
" b .Lcopy_kernel_L999 \n"
" b 8f //copy_kernel_L999 \n"
#else
" ands "J", "N", #"N_REM_MASK" \n"
" ble .Lcopy_kernel_L999 \n"
" ble 8f //copy_kernel_L999 \n"
#endif
".Lcopy_kernel_F10: \n"
"3: //copy_kernel_F10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Lcopy_kernel_F10 \n"
" b .Lcopy_kernel_L999 \n"
" bne 3b //copy_kernel_F10 \n"
" b 8f //copy_kernel_L999 \n"
".Lcopy_kernel_S_BEGIN: \n"
"4: //copy_kernel_S_BEGIN: \n"
" "INIT" \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Lcopy_kernel_S1 \n"
" ble 6f //copy_kernel_S1 \n"
".Lcopy_kernel_S4: \n"
"5: //copy_kernel_S4: \n"
" "KERNEL_F1" \n"
" "KERNEL_F1" \n"
" "KERNEL_F1" \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Lcopy_kernel_S4 \n"
" bne 5b //copy_kernel_S4 \n"
".Lcopy_kernel_S1: \n"
"6: //copy_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Lcopy_kernel_L999 \n"
" ble 8f //copy_kernel_L999 \n"
".Lcopy_kernel_S10: \n"
"7: //copy_kernel_S10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Lcopy_kernel_S10 \n"
" bne 7b //copy_kernel_S10 \n"
".Lcopy_kernel_L999: \n"
"8: //copy_kernel_L999: \n"
:
: [N_] "r" (n), //%1

View File

@ -141,58 +141,58 @@ static FLOAT dasum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
" fmov d6, "REG0" \n"
" fmov d7, "REG0" \n"
" cmp "N", xzr \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
" cmp "INC_X", xzr \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
" cmp "INC_X", #1 \n"
" bne .Lasum_kernel_S_BEGIN \n"
" bne 5f //asum_kernel_S_BEGIN \n"
".Lasum_kernel_F_BEGIN: \n"
"1: //asum_kernel_F_BEGIN: \n"
" asr "J", "N", #5 \n"
" cmp "J", xzr \n"
" beq .Lasum_kernel_F1 \n"
" beq 3f //asum_kernel_F1 \n"
".align 5 \n"
".Lasum_kernel_F32: \n"
"2: //asum_kernel_F32: \n"
" "KERNEL_F32" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_F32 \n"
" bne 2b //asum_kernel_F32 \n"
" "KERNEL_F32_FINALIZE" \n"
".Lasum_kernel_F1: \n"
"3: //asum_kernel_F1: \n"
" ands "J", "N", #31 \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
".Lasum_kernel_F10: \n"
"4: //asum_kernel_F10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_F10 \n"
" b .Lasum_kernel_L999 \n"
" bne 4b //asum_kernel_F10 \n"
" b 9f //asum_kernel_L999 \n"
".Lasum_kernel_S_BEGIN: \n"
"5: //asum_kernel_S_BEGIN: \n"
" "INIT_S" \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Lasum_kernel_S1 \n"
" ble 7f //asum_kernel_S1 \n"
".Lasum_kernel_S4: \n"
"6: //asum_kernel_S4: \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_S4 \n"
" bne 6b //asum_kernel_S4 \n"
".Lasum_kernel_S1: \n"
"7: //asum_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
".Lasum_kernel_S10: \n"
"8: //asum_kernel_S10: \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_S10 \n"
" bne 8b //asum_kernel_S10 \n"
".Lasum_kernel_L999: \n"
"9: //asum_kernel_L999: \n"
" fmov %[ASUM_], "SUMF" \n"
: [ASUM_] "=r" (asum) //%0

View File

@ -291,61 +291,61 @@ static RETURN_TYPE dot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, B
" fmov d6, xzr \n"
" fmov d7, xzr \n"
" cmp "N", xzr \n"
" ble .Ldot_kernel_L999 \n"
" ble 9f //dot_kernel_L999 \n"
" cmp "INC_X", #1 \n"
" bne .Ldot_kernel_S_BEGIN \n"
" bne 5f //dot_kernel_S_BEGIN \n"
" cmp "INC_Y", #1 \n"
" bne .Ldot_kernel_S_BEGIN \n"
" bne 5f //dot_kernel_S_BEGIN \n"
".Ldot_kernel_F_BEGIN: \n"
"1: //dot_kernel_F_BEGIN: \n"
" lsl "INC_X", "INC_X", "INC_SHIFT" \n"
" lsl "INC_Y", "INC_Y", "INC_SHIFT" \n"
" asr "J", "N", #"N_DIV_SHIFT" \n"
" cmp "J", xzr \n"
" beq .Ldot_kernel_F1 \n"
" beq 3f //dot_kernel_F1 \n"
" .align 5 \n"
".Ldot_kernel_F: \n"
"2: //dot_kernel_F: \n"
" "KERNEL_F" \n"
" subs "J", "J", #1 \n"
" bne .Ldot_kernel_F \n"
" bne 2b //dot_kernel_F \n"
" "KERNEL_F_FINALIZE" \n"
".Ldot_kernel_F1: \n"
"3: //dot_kernel_F1: \n"
" ands "J", "N", #"N_REM_MASK" \n"
" ble .Ldot_kernel_L999 \n"
" ble 9f //dot_kernel_L999 \n"
".Ldot_kernel_F10: \n"
"4: //dot_kernel_F10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Ldot_kernel_F10 \n"
" b .Ldot_kernel_L999 \n"
" bne 4b //dot_kernel_F10 \n"
" b 9f //dot_kernel_L999 \n"
".Ldot_kernel_S_BEGIN: \n"
"5: //dot_kernel_S_BEGIN: \n"
" lsl "INC_X", "INC_X", "INC_SHIFT" \n"
" lsl "INC_Y", "INC_Y", "INC_SHIFT" \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Ldot_kernel_S1 \n"
" ble 7f //dot_kernel_S1 \n"
".Ldot_kernel_S4: \n"
"6: //dot_kernel_S4: \n"
" "KERNEL_F1" \n"
" "KERNEL_F1" \n"
" "KERNEL_F1" \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Ldot_kernel_S4 \n"
" bne 6b //dot_kernel_S4 \n"
".Ldot_kernel_S1: \n"
"7: //dot_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Ldot_kernel_L999 \n"
" ble 9f //dot_kernel_L999 \n"
".Ldot_kernel_S10: \n"
"8: //dot_kernel_S10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Ldot_kernel_S10 \n"
" bne 8b //dot_kernel_S10 \n"
".Ldot_kernel_L999: \n"
"9: //dot_kernel_L999: \n"
" str "DOTF", [%[DOT_]] \n"
:

View File

@ -74,33 +74,33 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
" fmov "SCALE", xzr \n"
" fmov "SSQ", #1.0 \n"
" cmp "N", xzr \n"
" ble .Lnrm2_kernel_L999 \n"
" ble 9f //nrm2_kernel_L999 \n"
" cmp "INC_X", xzr \n"
" ble .Lnrm2_kernel_L999 \n"
" ble 9f //nrm2_kernel_L999 \n"
".Lnrm2_kernel_F_BEGIN: \n"
"1: //nrm2_kernel_F_BEGIN: \n"
" fmov "REGZERO", xzr \n"
" fmov "REGONE", #1.0 \n"
" lsl "INC_X", "INC_X", #"INC_SHIFT" \n"
" mov "J", "N" \n"
" cmp "J", xzr \n"
" beq .Lnrm2_kernel_L999 \n"
" beq 9f //nrm2_kernel_L999 \n"
".Lnrm2_kernel_F_ZERO_SKIP: \n"
"2: //nrm2_kernel_F_ZERO_SKIP: \n"
" ldr d4, ["X"] \n"
" fcmp d4, "REGZERO" \n"
" bne .Lnrm2_kernel_F_INIT \n"
" bne 3f //nrm2_kernel_F_INIT \n"
#if defined(COMPLEX)
" ldr d4, ["X", #8] \n"
" fcmp d4, "REGZERO" \n"
" bne .Lnrm2_kernel_F_INIT_I \n"
" bne 4f //nrm2_kernel_F_INIT_I \n"
#endif
" add "X", "X", "INC_X" \n"
" subs "J", "J", #1 \n"
" beq .Lnrm2_kernel_L999 \n"
" b .Lnrm2_kernel_F_ZERO_SKIP \n"
" beq 9f //nrm2_kernel_L999 \n"
" b 2b //nrm2_kernel_F_ZERO_SKIP \n"
".Lnrm2_kernel_F_INIT: \n"
"3: //nrm2_kernel_F_INIT: \n"
" ldr d4, ["X"] \n"
" fabs d4, d4 \n"
" fmax "CUR_MAX", "SCALE", d4 \n"
@ -112,7 +112,7 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
" fadd "SSQ", "SSQ", d4 \n"
" fmov "SCALE", "CUR_MAX" \n"
#if defined(COMPLEX)
".Lnrm2_kernel_F_INIT_I: \n"
"4: //nrm2_kernel_F_INIT_I: \n"
" ldr d3, ["X", #8] \n"
" fabs d3, d3 \n"
" fmax "CUR_MAX", "SCALE", d3 \n"
@ -126,16 +126,16 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
#endif
" add "X", "X", "INC_X" \n"
" subs "J", "J", #1 \n"
" beq .Lnrm2_kernel_L999 \n"
" beq 9f //nrm2_kernel_L999 \n"
".Lnrm2_kernel_F_START: \n"
"5: //nrm2_kernel_F_START: \n"
" cmp "INC_X", #"SZ" \n"
" bne .Lnrm2_kernel_F1 \n"
" bne 8f //nrm2_kernel_F1 \n"
" asr "K", "J", #4 \n"
" cmp "K", xzr \n"
" beq .Lnrm2_kernel_F1 \n"
" beq 8f //nrm2_kernel_F1 \n"
".Lnrm2_kernel_F: \n"
"6: //nrm2_kernel_F: \n"
" ldp q16, q17, ["X"] \n"
" ldp q18, q19, ["X", #32] \n"
" ldp q20, q21, ["X", #64] \n"
@ -255,13 +255,13 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
" fmov "SCALE", "CUR_MAX" \n"
#endif
" subs "K", "K", #1 \n"
" bne .Lnrm2_kernel_F \n"
" bne 6b //nrm2_kernel_F \n"
".Lnrm2_kernel_F_DONE: \n"
"7: //nrm2_kernel_F_DONE: \n"
" ands "J", "J", #15 \n"
" beq .Lnrm2_kernel_L999 \n"
" beq 9f //nrm2_kernel_L999 \n"
".Lnrm2_kernel_F1: \n"
"8: //nrm2_kernel_F1: \n"
" ldr d4, ["X"] \n"
" fabs d4, d4 \n"
" fmax "CUR_MAX", "SCALE", d4 \n"
@ -286,9 +286,9 @@ static void nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x,
#endif
" add "X", "X", "INC_X" \n"
" subs "J", "J", #1 \n"
" bne .Lnrm2_kernel_F1 \n"
" bne 8b //nrm2_kernel_F1 \n"
".Lnrm2_kernel_L999: \n"
"9: //nrm2_kernel_L999: \n"
" str "SSQ", [%[SSQ_]] \n"
" str "SCALE", [%[SCALE_]] \n"

View File

@ -208,7 +208,7 @@ extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n
#endif
static BLASLONG iamax_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
static BLASLONG __attribute__((noinline)) iamax_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG index = 0;
@ -220,72 +220,72 @@ static BLASLONG iamax_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
" mov "INC_X", %[INCX_] \n"
" cmp "N", xzr \n"
" ble .Liamax_kernel_zero \n"
" ble 10f //iamax_kernel_zero \n"
" cmp "INC_X", xzr \n"
" ble .Liamax_kernel_zero \n"
" ble 10f //iamax_kernel_zero \n"
" cmp "INC_X", #1 \n"
" bne .Liamax_kernel_S_BEGIN \n"
" bne 5f //iamax_kernel_S_BEGIN \n"
" mov x7, "X" \n"
".Liamax_kernel_F_BEGIN: \n"
"1: //iamax_kernel_F_BEGIN: \n"
" "INIT" \n"
" subs "N", "N", #1 \n"
" ble .Liamax_kernel_L999 \n"
" ble 9f //iamax_kernel_L999 \n"
" asr "J", "N", #"N_DIV_SHIFT" \n"
" cmp "J", xzr \n"
" beq .Liamax_kernel_F1 \n"
" beq 3f //iamax_kernel_F1 \n"
" add "Z", "Z", #1 \n"
".Liamax_kernel_F: \n"
"2: //iamax_kernel_F: \n"
" "KERNEL_F" \n"
" subs "J", "J", #1 \n"
" bne .Liamax_kernel_F \n"
" bne 2b //iamax_kernel_F \n"
" "KERNEL_F_FINALIZE" \n"
" sub "Z", "Z", #1 \n"
".Liamax_kernel_F1: \n"
"3: //iamax_kernel_F1: \n"
" ands "J", "N", #"N_REM_MASK" \n"
" ble .Liamax_kernel_L999 \n"
" ble 9f //iamax_kernel_L999 \n"
".Liamax_kernel_F10: \n"
"4: //iamax_kernel_F10: \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Liamax_kernel_F10 \n"
" b .Liamax_kernel_L999 \n"
" bne 4b //iamax_kernel_F10 \n"
" b 9f //iamax_kernel_L999 \n"
".Liamax_kernel_S_BEGIN: \n"
"5: //iamax_kernel_S_BEGIN: \n"
" "INIT" \n"
" subs "N", "N", #1 \n"
" ble .Liamax_kernel_L999 \n"
" ble 9f //iamax_kernel_L999 \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Liamax_kernel_S1 \n"
" ble 7f //iamax_kernel_S1 \n"
".Liamax_kernel_S4: \n"
"6: //iamax_kernel_S4: \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Liamax_kernel_S4 \n"
" bne 6b //iamax_kernel_S4 \n"
".Liamax_kernel_S1: \n"
"7: //iamax_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Liamax_kernel_L999 \n"
" ble 9f //iamax_kernel_L999 \n"
".Liamax_kernel_S10: \n"
"8: //iamax_kernel_S10: \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Liamax_kernel_S10 \n"
" bne 8b //iamax_kernel_S10 \n"
".Liamax_kernel_L999: \n"
"9: //iamax_kernel_L999: \n"
" mov x0, "INDEX" \n"
" b .Liamax_kernel_DONE \n"
" b 11f //iamax_kernel_DONE \n"
".Liamax_kernel_zero: \n"
"10: //iamax_kernel_zero: \n"
" mov x0, xzr \n"
".Liamax_kernel_DONE: \n"
"11: //iamax_kernel_DONE: \n"
" mov %[INDEX_], "INDEX" \n"
: [INDEX_] "=r" (index) //%0

View File

@ -229,72 +229,72 @@ static BLASLONG izamax_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
" mov "INC_X", %[INCX_] \n"
" cmp "N", xzr \n"
" ble .Lizamax_kernel_zero \n"
" ble 10f //izamax_kernel_zero \n"
" cmp "INC_X", xzr \n"
" ble .Lizamax_kernel_zero \n"
" ble 10f //izamax_kernel_zero \n"
" cmp "INC_X", #1 \n"
" bne .Lizamax_kernel_S_BEGIN \n"
" bne 5f //izamax_kernel_S_BEGIN \n"
" mov x7, "X" \n"
".Lizamax_kernel_F_BEGIN: \n"
"1: //izamax_kernel_F_BEGIN: \n"
" "INIT" \n"
" subs "N", "N", #1 \n"
" ble .Lizamax_kernel_L999 \n"
" ble 9f //izamax_kernel_L999 \n"
" asr "J", "N", #"N_DIV_SHIFT" \n"
" cmp "J", xzr \n"
" beq .Lizamax_kernel_F1 \n"
" beq 3f //izamax_kernel_F1 \n"
" add "Z", "Z", #1 \n"
".Lizamax_kernel_F: \n"
"2: //izamax_kernel_F: \n"
" "KERNEL_F" \n"
" subs "J", "J", #1 \n"
" bne .Lizamax_kernel_F \n"
" bne 2b //izamax_kernel_F \n"
" "KERNEL_F_FINALIZE" \n"
" sub "Z", "Z", #1 \n"
".Lizamax_kernel_F1: \n"
"3: //izamax_kernel_F1: \n"
" ands "J", "N", #"N_REM_MASK" \n"
" ble .Lizamax_kernel_L999 \n"
" ble 9f //izamax_kernel_L999 \n"
".Lizamax_kernel_F10: \n"
"4: //izamax_kernel_F10: \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lizamax_kernel_F10 \n"
" b .Lizamax_kernel_L999 \n"
" bne 4b //izamax_kernel_F10 \n"
" b 9f //izamax_kernel_L999 \n"
".Lizamax_kernel_S_BEGIN: \n"
"5: //izamax_kernel_S_BEGIN: \n"
" "INIT" \n"
" subs "N", "N", #1 \n"
" ble .Lizamax_kernel_L999 \n"
" ble 9f //izamax_kernel_L999 \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Lizamax_kernel_S1 \n"
" ble 7f //izamax_kernel_S1 \n"
".Lizamax_kernel_S4: \n"
"6: //izamax_kernel_S4: \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lizamax_kernel_S4 \n"
" bne 6b //izamax_kernel_S4 \n"
".Lizamax_kernel_S1: \n"
"7: //izamax_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Lizamax_kernel_L999 \n"
" ble 9f //izamax_kernel_L999 \n"
".Lizamax_kernel_S10: \n"
"8: //izamax_kernel_S10: \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lizamax_kernel_S10 \n"
" bne 8b //izamax_kernel_S10 \n"
".Lizamax_kernel_L999: \n"
"9: //izamax_kernel_L999: \n"
" mov x0, "INDEX" \n"
" b .Lizamax_kernel_DONE \n"
" b 11f //izamax_kernel_DONE \n"
".Lizamax_kernel_zero: \n"
"10: //izamax_kernel_zero: \n"
" mov x0, xzr \n"
".Lizamax_kernel_DONE: \n"
"11: //izamax_kernel_DONE: \n"
" mov %[INDEX_], "INDEX" \n"
: [INDEX_] "=r" (index) //%0

View File

@ -143,58 +143,58 @@ static FLOAT sasum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
" fmov s6, "REG0" \n"
" fmov s7, "REG0" \n"
" cmp "N", xzr \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
" cmp "INC_X", xzr \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
" cmp "INC_X", #1 \n"
" bne .Lasum_kernel_S_BEGIN \n"
" bne 5f //asum_kernel_S_BEGIN \n"
".Lasum_kernel_F_BEGIN: \n"
"1: //asum_kernel_F_BEGIN: \n"
" asr "J", "N", #6 \n"
" cmp "J", xzr \n"
" beq .Lasum_kernel_F1 \n"
" beq 3f //asum_kernel_F1 \n"
".align 5 \n"
".Lasum_kernel_F64: \n"
"2: //asum_kernel_F64: \n"
" "KERNEL_F64" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_F64 \n"
" bne 2b //asum_kernel_F64 \n"
" "KERNEL_F64_FINALIZE" \n"
".Lasum_kernel_F1: \n"
"3: //asum_kernel_F1: \n"
" ands "J", "N", #63 \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
".Lasum_kernel_F10: \n"
"4: //asum_kernel_F10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_F10 \n"
" b .Lasum_kernel_L999 \n"
" bne 4b //asum_kernel_F10 \n"
" b 9f //asum_kernel_L999 \n"
".Lasum_kernel_S_BEGIN: \n"
"5: //asum_kernel_S_BEGIN: \n"
" "INIT_S" \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Lasum_kernel_S1 \n"
" ble 7f //asum_kernel_S1 \n"
".Lasum_kernel_S4: \n"
"6: //asum_kernel_S4: \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_S4 \n"
" bne 6b //asum_kernel_S4 \n"
".Lasum_kernel_S1: \n"
"7: //asum_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
".Lasum_kernel_S10: \n"
"8: //asum_kernel_S10: \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_S10 \n"
" bne 8b //asum_kernel_S10 \n"
".Lasum_kernel_L999: \n"
"9: //asum_kernel_L999: \n"
" fmov %[ASUM_], "SUMFD" \n"
: [ASUM_] "=r" (asum) //%0

View File

@ -227,58 +227,58 @@ static double nrm2_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
" fmov d6, xzr \n"
" fmov d7, xzr \n"
" cmp "N", xzr \n"
" ble .Lnrm2_kernel_L999 \n"
" ble 9f //nrm2_kernel_L999 \n"
" cmp "INC_X", xzr \n"
" ble .Lnrm2_kernel_L999 \n"
" ble 9f //nrm2_kernel_L999 \n"
" cmp "INC_X", #1 \n"
" bne .Lnrm2_kernel_S_BEGIN \n"
" bne 5f //nrm2_kernel_S_BEGIN \n"
".Lnrm2_kernel_F_BEGIN: \n"
"1: //nrm2_kernel_F_BEGIN: \n"
" asr "J", "N", #"N_DIV_SHIFT" \n"
" cmp "J", xzr \n"
" beq .Lnrm2_kernel_S_BEGIN \n"
" beq 5f //nrm2_kernel_S_BEGIN \n"
" .align 5 \n"
".Lnrm2_kernel_F: \n"
"2: //nrm2_kernel_F: \n"
" "KERNEL_F" \n"
" subs "J", "J", #1 \n"
" bne .Lnrm2_kernel_F \n"
" bne 2b //nrm2_kernel_F \n"
" "KERNEL_F_FINALIZE" \n"
".Lnrm2_kernel_F1: \n"
"3: //nrm2_kernel_F1: \n"
" ands "J", "N", #"N_REM_MASK" \n"
" ble .Lnrm2_kernel_L999 \n"
" ble 9f //nrm2_kernel_L999 \n"
".Lnrm2_kernel_F10: \n"
"4: //nrm2_kernel_F10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Lnrm2_kernel_F10 \n"
" b .Lnrm2_kernel_L999 \n"
" bne 4b //nrm2_kernel_F10 \n"
" b 9f //nrm2_kernel_L999 \n"
".Lnrm2_kernel_S_BEGIN: \n"
"5: //nrm2_kernel_S_BEGIN: \n"
" lsl "INC_X", "INC_X", #"INC_SHIFT" \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Lnrm2_kernel_S1 \n"
" ble 7f //nrm2_kernel_S1 \n"
".Lnrm2_kernel_S4: \n"
"6: //nrm2_kernel_S4: \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lnrm2_kernel_S4 \n"
" bne 6b //nrm2_kernel_S4 \n"
".Lnrm2_kernel_S1: \n"
"7: //nrm2_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Lnrm2_kernel_L999 \n"
" ble 9f //nrm2_kernel_L999 \n"
".Lnrm2_kernel_S10: \n"
"8: //nrm2_kernel_S10: \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lnrm2_kernel_S10 \n"
" bne 8b //nrm2_kernel_S10 \n"
".Lnrm2_kernel_L999: \n"
"9: //nrm2_kernel_L999: \n"
" "KERNEL_FINALIZE" \n"
" fmov %[RET_], "SSQD" \n"

View File

@ -143,58 +143,58 @@ static FLOAT zasum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x)
" fmov d6, "REG0" \n"
" fmov d7, "REG0" \n"
" cmp "N", xzr \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
" cmp "INC_X", xzr \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
" cmp "INC_X", #1 \n"
" bne .Lasum_kernel_S_BEGIN \n"
" bne 5f //asum_kernel_S_BEGIN \n"
".Lasum_kernel_F_BEGIN: \n"
"1: //asum_kernel_F_BEGIN: \n"
" asr "J", "N", #4 \n"
" cmp "J", xzr \n"
" beq .Lasum_kernel_F1 \n"
" beq 3f //asum_kernel_F1 \n"
".align 5 \n"
".Lasum_kernel_F16: \n"
"2: //asum_kernel_F16: \n"
" "KERNEL_F16" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_F16 \n"
" bne 2b //asum_kernel_F16 \n"
" "KERNEL_F16_FINALIZE" \n"
".Lasum_kernel_F1: \n"
"3: //asum_kernel_F1: \n"
" ands "J", "N", #15 \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
".Lasum_kernel_F10: \n"
"4: //asum_kernel_F10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_F10 \n"
" b .Lasum_kernel_L999 \n"
" bne 4b //asum_kernel_F10 \n"
" b 9f //asum_kernel_L999 \n"
".Lasum_kernel_S_BEGIN: \n"
"5: //asum_kernel_S_BEGIN: \n"
" "INIT_S" \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Lasum_kernel_S1 \n"
" ble 7f //asum_kernel_S1 \n"
".Lasum_kernel_S4: \n"
"6: //asum_kernel_S4: \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_S4 \n"
" bne 6b //asum_kernel_S4 \n"
".Lasum_kernel_S1: \n"
"7: //asum_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Lasum_kernel_L999 \n"
" ble 9f //asum_kernel_L999 \n"
".Lasum_kernel_S10: \n"
"8: //asum_kernel_S10: \n"
" "KERNEL_S1" \n"
" subs "J", "J", #1 \n"
" bne .Lasum_kernel_S10 \n"
" bne 8b //asum_kernel_S10 \n"
".Lasum_kernel_L999: \n"
"9: //asum_kernel_L999: \n"
" fmov %[ASUM_], "SUMF" \n"
: [ASUM_] "=r" (asum) //%0

View File

@ -218,61 +218,61 @@ static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON
" fmov d6, xzr \n"
" fmov d7, xzr \n"
" cmp "N", xzr \n"
" ble .Ldot_kernel_L999 \n"
" ble 9f //dot_kernel_L999 \n"
" cmp "INC_X", #1 \n"
" bne .Ldot_kernel_S_BEGIN \n"
" bne 5f //dot_kernel_S_BEGIN \n"
" cmp "INC_Y", #1 \n"
" bne .Ldot_kernel_S_BEGIN \n"
" bne 5f //dot_kernel_S_BEGIN \n"
".Ldot_kernel_F_BEGIN: \n"
"1: //dot_kernel_F_BEGIN: \n"
" lsl "INC_X", "INC_X", "INC_SHIFT" \n"
" lsl "INC_Y", "INC_Y", "INC_SHIFT" \n"
" asr "J", "N", #"N_DIV_SHIFT" \n"
" cmp "J", xzr \n"
" beq .Ldot_kernel_F1 \n"
" beq 3f //dot_kernel_F1 \n"
" .align 5 \n"
".Ldot_kernel_F: \n"
"2: //dot_kernel_F: \n"
" "KERNEL_F" \n"
" subs "J", "J", #1 \n"
" bne .Ldot_kernel_F \n"
" bne 2b //dot_kernel_F \n"
" "KERNEL_F_FINALIZE" \n"
".Ldot_kernel_F1: \n"
"3: //dot_kernel_F1: \n"
" ands "J", "N", #"N_REM_MASK" \n"
" ble .Ldot_kernel_L999 \n"
" ble 9f //dot_kernel_L999 \n"
".Ldot_kernel_F10: \n"
"4: //dot_kernel_F10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Ldot_kernel_F10 \n"
" b .Ldot_kernel_L999 \n"
" bne 4b //dot_kernel_F10 \n"
" b 9f //dot_kernel_L999 \n"
".Ldot_kernel_S_BEGIN: \n"
"5: //dot_kernel_S_BEGIN: \n"
" lsl "INC_X", "INC_X", "INC_SHIFT" \n"
" lsl "INC_Y", "INC_Y", "INC_SHIFT" \n"
" asr "J", "N", #2 \n"
" cmp "J", xzr \n"
" ble .Ldot_kernel_S1 \n"
" ble 7f //dot_kernel_S1 \n"
".Ldot_kernel_S4: \n"
"6: //dot_kernel_S4: \n"
" "KERNEL_F1" \n"
" "KERNEL_F1" \n"
" "KERNEL_F1" \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Ldot_kernel_S4 \n"
" bne 6b //dot_kernel_S4 \n"
".Ldot_kernel_S1: \n"
"7: //dot_kernel_S1: \n"
" ands "J", "N", #3 \n"
" ble .Ldot_kernel_L999 \n"
" ble 9f //dot_kernel_L999 \n"
".Ldot_kernel_S10: \n"
"8: //dot_kernel_S10: \n"
" "KERNEL_F1" \n"
" subs "J", "J", #1 \n"
" bne .Ldot_kernel_S10 \n"
" bne 8b //dot_kernel_S10 \n"
".Ldot_kernel_L999: \n"
"9: //dot_kernel_L999: \n"
" str "DOTF", [%[DOTR_]] \n"
" str "DOTI", [%[DOTI_]] \n"

View File

@ -91,16 +91,15 @@ static void cdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)
#endif
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i;
BLASLONG ix,iy;
FLOAT _Complex result;
FLOAT dot[8] = { 0.0, 0.0, 0.0 , 0.0, 0.0, 0.0, 0.0, 0.0 } ;
if ( n <= 0 )
{
result = OPENBLAS_MAKE_COMPLEX_FLOAT (0.0, 0.0) ;
OPENBLAS_COMPLEX_FLOAT result = OPENBLAS_MAKE_COMPLEX_FLOAT (0.0, 0.0) ;
return(result);
}
@ -160,11 +159,11 @@ FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG in
}
#if !defined(CONJ)
result = OPENBLAS_MAKE_COMPLEX_FLOAT (dot[0]-dot[1], dot[4]+dot[5]) ;
OPENBLAS_COMPLEX_FLOAT result = OPENBLAS_MAKE_COMPLEX_FLOAT (dot[0]-dot[1], dot[4]+dot[5]) ;
// CREAL(result) = dot[0] - dot[1];
// CIMAG(result) = dot[4] + dot[5];
#else
result = OPENBLAS_MAKE_COMPLEX_FLOAT (dot[0]+dot[1], dot[4]-dot[5]) ;
OPENBLAS_COMPLEX_FLOAT result = OPENBLAS_MAKE_COMPLEX_FLOAT (dot[0]+dot[1], dot[4]-dot[5]) ;
// CREAL(result) = dot[0] + dot[1];
// CIMAG(result) = dot[4] - dot[5];

View File

@ -86,18 +86,17 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)
#endif
FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i;
BLASLONG ix,iy;
FLOAT _Complex result;
FLOAT dot[4] = { 0.0, 0.0, 0.0 , 0.0 } ;
if ( n <= 0 )
{
// CREAL(result) = 0.0 ;
// CIMAG(result) = 0.0 ;
result=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0,0.0);
OPENBLAS_COMPLEX_FLOAT result=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0,0.0);
return(result);
}
@ -151,11 +150,11 @@ FLOAT _Complex CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG in
}
#if !defined(CONJ)
result=OPENBLAS_MAKE_COMPLEX_FLOAT(dot[0]-dot[1],dot[2]+dot[3]);
OPENBLAS_COMPLEX_FLOAT result=OPENBLAS_MAKE_COMPLEX_FLOAT(dot[0]-dot[1],dot[2]+dot[3]);
// CREAL(result) = dot[0] - dot[1];
// CIMAG(result) = dot[2] + dot[3];
#else
result=OPENBLAS_MAKE_COMPLEX_FLOAT(dot[0]+dot[1],dot[2]-dot[3]);
OPENBLAS_COMPLEX_FLOAT result=OPENBLAS_MAKE_COMPLEX_FLOAT(dot[0]+dot[1],dot[2]-dot[3]);
// CREAL(result) = dot[0] + dot[1];
// CIMAG(result) = dot[2] - dot[3];

View File

@ -59,7 +59,7 @@ typedef int blasint;
extension since version 3.0. If neither are available, use a compatible
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT)))
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER)
#define OPENBLAS_COMPLEX_C99
#ifndef __cplusplus
#include <complex.h>

View File

@ -1,10 +1,14 @@
include_directories(${PROJECT_SOURCE_DIR})
include_directories(${PROJECT_BINARY_DIR})
if (MSVC AND "${CMAKE_C_COMPILER_ID}" MATCHES Clang)
set(OpenBLAS_utest_src utest_main2.c)
else ()
set(OpenBLAS_utest_src
utest_main.c
test_amax.c
)
endif ()
if (NOT NO_LAPACK)
set(OpenBLAS_utest_src
@ -36,7 +40,7 @@ endforeach()
if (MSVC)
add_custom_command(TARGET ${OpenBLAS_utest_bin}
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/lib/$<CONFIG>/${OpenBLAS_LIBNAME}.dll ${CMAKE_CURRENT_BINARY_DIR}/.
COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}/${OpenBLAS_LIBNAME}.dll ${CMAKE_CURRENT_BINARY_DIR}/.
)
endif()

61
utest/utest_main2.c Normal file
View File

@ -0,0 +1,61 @@
/*****************************************************************************
Copyright (c) 2011-2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include <stdio.h>
#define CTEST_MAIN
#define CTEST_SEGFAULT
#define CTEST_ADD_TESTS_MANUALLY
#include "openblas_utest.h"
CTEST(amax, samax){
blasint N=3, inc=1;
float te_max=0.0, tr_max=0.0;
float x[]={-1.1, 2.2, -3.3};
te_max=BLASFUNC(samax)(&N, x, &inc);
tr_max=3.3;
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
}
int main(int argc, const char ** argv){
CTEST_ADD(amax, samax);
int num_fail=0;
num_fail=ctest_main(argc, argv);
return num_fail;
}