commit
ed473267df
24
.drone.yml
24
.drone.yml
|
@ -190,3 +190,27 @@ steps:
|
||||||
- make -C ctest $COMMON_FLAGS
|
- make -C ctest $COMMON_FLAGS
|
||||||
- make -C utest $COMMON_FLAGS
|
- make -C utest $COMMON_FLAGS
|
||||||
- make -C cpp_thread_test dgemm_tester
|
- make -C cpp_thread_test dgemm_tester
|
||||||
|
---
|
||||||
|
kind: pipeline
|
||||||
|
name: arm64_gcc10
|
||||||
|
|
||||||
|
platform:
|
||||||
|
os: linux
|
||||||
|
arch: arm64
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Build and Test
|
||||||
|
image: ubuntu:20.04
|
||||||
|
environment:
|
||||||
|
CC: gcc-10
|
||||||
|
FC: gfortran-10
|
||||||
|
COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1'
|
||||||
|
commands:
|
||||||
|
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||||
|
- apt-get update -y
|
||||||
|
- apt-get install -y make $CC gfortran-10 perl python g++
|
||||||
|
- $CC --version
|
||||||
|
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||||
|
- make -C utest $COMMON_FLAGS
|
||||||
|
- make -C test $COMMON_FLAGS
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,11 @@ jobs:
|
||||||
if: github.event_name != 'pull_request'
|
if: github.event_name != 'pull_request'
|
||||||
run: brew update || true
|
run: brew update || true
|
||||||
|
|
||||||
|
- name: unlink installed gcc to allow updating
|
||||||
|
run: |
|
||||||
|
brew unlink gcc@8
|
||||||
|
brew unlink gcc@9
|
||||||
|
|
||||||
- name: Install prerequisites
|
- name: Install prerequisites
|
||||||
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas
|
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas
|
||||||
|
|
||||||
|
|
|
@ -89,5 +89,7 @@ build.*
|
||||||
*.swp
|
*.swp
|
||||||
benchmark/*.goto
|
benchmark/*.goto
|
||||||
benchmark/smallscaling
|
benchmark/smallscaling
|
||||||
|
.vscode
|
||||||
CMakeCache.txt
|
CMakeCache.txt
|
||||||
CMakeFiles/*
|
CMakeFiles/*
|
||||||
|
.vscode
|
||||||
|
|
45
.travis.yml
45
.travis.yml
|
@ -211,44 +211,57 @@ matrix:
|
||||||
|
|
||||||
- &test-macos
|
- &test-macos
|
||||||
os: osx
|
os: osx
|
||||||
osx_image: xcode10.1
|
osx_image: xcode11.5
|
||||||
before_script:
|
before_script:
|
||||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||||
- brew update
|
|
||||||
- brew install gcc@8 # for gfortran
|
|
||||||
script:
|
script:
|
||||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||||
env:
|
env:
|
||||||
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"
|
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-9"
|
||||||
|
|
||||||
|
- <<: *test-macos
|
||||||
|
osx_image: xcode12
|
||||||
|
before_script:
|
||||||
|
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||||
|
- brew update
|
||||||
|
script:
|
||||||
|
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||||
|
env:
|
||||||
|
- BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10"
|
||||||
|
|
||||||
- <<: *test-macos
|
- <<: *test-macos
|
||||||
osx_image: xcode12
|
osx_image: xcode12
|
||||||
before_script:
|
before_script:
|
||||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||||
- brew update
|
- brew update
|
||||||
- brew install gcc@10 # for gfortran
|
|
||||||
script:
|
script:
|
||||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||||
env:
|
env:
|
||||||
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
|
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
|
||||||
|
|
||||||
- <<: *test-macos
|
# - <<: *test-macos
|
||||||
osx_image: xcode10.0
|
# osx_image: xcode10
|
||||||
env:
|
# env:
|
||||||
- BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"
|
# - BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"
|
||||||
|
|
||||||
- <<: *test-macos
|
- <<: *test-macos
|
||||||
osx_image: xcode10.1
|
osx_image: xcode11.5
|
||||||
|
before_script:
|
||||||
|
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||||
|
- brew update
|
||||||
env:
|
env:
|
||||||
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
# - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||||
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
|
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||||
|
- CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||||
|
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||||
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
|
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
|
||||||
|
|
||||||
- <<: *test-macos
|
- <<: *test-macos
|
||||||
osx_image: xcode10.1
|
osx_image: xcode11.5
|
||||||
env:
|
env:
|
||||||
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
# - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||||
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
|
# - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
|
||||||
|
- CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||||
|
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch armv7 -miphoneos-version-min=5.1"
|
||||||
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
|
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
|
||||||
|
|
||||||
- &test-graviton2
|
- &test-graviton2
|
||||||
|
|
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
||||||
project(OpenBLAS C ASM)
|
project(OpenBLAS C ASM)
|
||||||
set(OpenBLAS_MAJOR_VERSION 0)
|
set(OpenBLAS_MAJOR_VERSION 0)
|
||||||
set(OpenBLAS_MINOR_VERSION 3)
|
set(OpenBLAS_MINOR_VERSION 3)
|
||||||
set(OpenBLAS_PATCH_VERSION 10.dev)
|
set(OpenBLAS_PATCH_VERSION 14.dev)
|
||||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||||
|
|
||||||
# Adhere to GNU filesystem layout conventions
|
# Adhere to GNU filesystem layout conventions
|
||||||
|
@ -14,6 +14,9 @@ include(GNUInstallDirs)
|
||||||
|
|
||||||
include(CMakePackageConfigHelpers)
|
include(CMakePackageConfigHelpers)
|
||||||
|
|
||||||
|
if(MSVC AND NOT DEFINED NOFORTRAN)
|
||||||
|
set(NOFORTRAN ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
#######
|
#######
|
||||||
if(MSVC)
|
if(MSVC)
|
||||||
|
@ -229,7 +232,7 @@ if (NOT NO_CBLAS)
|
||||||
add_subdirectory(utest)
|
add_subdirectory(utest)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT MSVC AND NOT NOFORTRAN)
|
if (NOT NOFORTRAN)
|
||||||
# Build test and ctest
|
# Build test and ctest
|
||||||
add_subdirectory(test)
|
add_subdirectory(test)
|
||||||
if(NOT NO_CBLAS)
|
if(NOT NO_CBLAS)
|
||||||
|
|
|
@ -191,3 +191,6 @@ In chronological order:
|
||||||
|
|
||||||
* Danfeng Zhang <https://github.com/craft-zhang>
|
* Danfeng Zhang <https://github.com/craft-zhang>
|
||||||
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
||||||
|
|
||||||
|
* PingTouGe Semiconductor Co., Ltd.
|
||||||
|
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910
|
||||||
|
|
196
Changelog.txt
196
Changelog.txt
|
@ -1,4 +1,200 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.14
|
||||||
|
17-Mar-2021
|
||||||
|
|
||||||
|
common:
|
||||||
|
* Fixed a race condition on thread shutdown in non-OpenMP builds
|
||||||
|
* Fixed custom BUFFERSIZE option getting ignored in gmake builds
|
||||||
|
* Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms
|
||||||
|
* Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT
|
||||||
|
* Improved performance of OMATCOPY_RT across all platforms
|
||||||
|
* Changed perl scripts to use env instead of a hardcoded /usr/bin/perl
|
||||||
|
* Fixed potential misreading of the GCC compiler version in the build scripts
|
||||||
|
* Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477)
|
||||||
|
* Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335)
|
||||||
|
|
||||||
|
RISCV:
|
||||||
|
* Fixed compilation on RISCV (missing entry in getarch)
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Fixed compilation for DYNAMIC_ARCH with clang and with old gcc versions
|
||||||
|
* Added support for compilation on FreeBSD/ppc64le
|
||||||
|
* Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL
|
||||||
|
* Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM
|
||||||
|
* Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10
|
||||||
|
* Improved SCOPY and CCOPY performance on POWER10
|
||||||
|
* Improved SGEMM and DGEMM performance on POWER10
|
||||||
|
* Added support for compilation with the NVIDIA HPC compiler
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* Added an optimized bfloat16 GEMM kernel for Cooperlake
|
||||||
|
* Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus
|
||||||
|
* Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus
|
||||||
|
* Added support for compilation with the NAG Fortran compiler
|
||||||
|
* Fixed recognition of the AMD AOCC compiler
|
||||||
|
* Fixed compilation for DYNAMIC_ARCH with clang on Windows
|
||||||
|
* Added support for running the BLAS/CBLAS tests on Windows
|
||||||
|
* Fixed signatures of the tls callback functions for Windows x64
|
||||||
|
* Fixed various issues with fma intrinsics support handling
|
||||||
|
|
||||||
|
ARM:
|
||||||
|
* Added support for embedded Cortex M targets via a new option EMBEDDED
|
||||||
|
|
||||||
|
ARMV8:
|
||||||
|
* Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf
|
||||||
|
* Added support for the DYNAMIC_LIST option
|
||||||
|
* Added support for compilation with the NVIDIA HPC compiler
|
||||||
|
* Added support for compiling with the NAG Fortran compiler
|
||||||
|
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.13
|
||||||
|
12-Dec-2020
|
||||||
|
|
||||||
|
common:
|
||||||
|
* Added a generic bfloat16 SBGEMV kernel
|
||||||
|
* Fixed a potentially severe memory leak after fork in OpenMP builds
|
||||||
|
that was introduced in 0.3.12
|
||||||
|
* Added detection of the Fujitsu Fortran compiler
|
||||||
|
* Added detection of the (e)gfortran compiler on OpenBSD
|
||||||
|
* Added support for overriding the default name of the library independently
|
||||||
|
from symbol suffixing in the gmake builds (already supported in cmake)
|
||||||
|
|
||||||
|
RISCV:
|
||||||
|
* Added a RISC V port optimized for C910V
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Added optimized POWER10 kernels for SAXPY, CAXPY, SDOT, DDOT and DGEMV_N
|
||||||
|
* Improved DGEMM performance on POWER10
|
||||||
|
* Improved STRSM and DTRSM performance on POWER9 and POWER10
|
||||||
|
* Fixed segmemtation faults in DYNAMIC_ARCH builds
|
||||||
|
* Fixed compilation with the PGI compiler
|
||||||
|
|
||||||
|
x86:
|
||||||
|
* Fixed compilation of kernels that require SSE2 intrinsics since 0.3.12
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* Added an optimized bfloat16 SBGEMV kernel for SkylakeX and Cooperlake
|
||||||
|
* Improved the performance of SASUM and DASUM kernels through parallelization
|
||||||
|
* Improved the performance of SROT and DROT kernels
|
||||||
|
* Improved the performance of multithreaded xSYRK
|
||||||
|
* Fixed OpenMP builds that use the LLVM Clang compiler together with GNU gfortran
|
||||||
|
(where linking of both the LLVM libomp and GNU libgomp could lead to lockups or
|
||||||
|
wrong results)
|
||||||
|
* Fixed miscompilations by old gcc 4.6
|
||||||
|
* Fixed misdetection of AVX2 capability in some Sandybridge cpus
|
||||||
|
* Fixed lockups in builds combining DYNAMIC_ARCH with TARGET=GENERIC on OpenBSD
|
||||||
|
|
||||||
|
ARM64:
|
||||||
|
* Fixed segmemtation faults in DYNAMIC_ARCH builds
|
||||||
|
|
||||||
|
MIPS:
|
||||||
|
* Improved kernels for Loongson 3R3 ("3A") and 3R4 ("3B") models, including MSA
|
||||||
|
* Fixed bugs in the MSA kernels for CGEMM, CTRMM, CGEMV and ZGEMV
|
||||||
|
* Added handling of zero increments in the MSA kernels for SSWAP and DSWAP
|
||||||
|
* Added DYNAMIC_ARCH support for MIPS64 (currently Loongson3R3/3R4 only)
|
||||||
|
|
||||||
|
SPARC:
|
||||||
|
* Fixed building 32 and 64 bit SPARC kernels with the SolarisStudio compilers
|
||||||
|
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.12
|
||||||
|
24-Oct-2020
|
||||||
|
|
||||||
|
common:
|
||||||
|
* Fixed missing BLAS/LAPACK functions (inadvertently dropped during
|
||||||
|
the build system restructuring)
|
||||||
|
* Fixed argument conversion macro in LAPACKE_zgesvdq (LAPACK #458)
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Added optimized SCOPY/CCOPY kernels for POWER10
|
||||||
|
* Increased and unified the default size of the GEMM BUFFER
|
||||||
|
* Fixed building for POWER10 in DYNAMIC_ARCH mode
|
||||||
|
* POWER10 compatibility test now checks binutils version as well
|
||||||
|
* Cleaned up compiler warnings
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* corrected compiler version checks for AVX2 compatibility
|
||||||
|
* added compiler option -mavx2 for building with flang
|
||||||
|
* fixed direct SGEMM pathway for small matrix sizes (broken by
|
||||||
|
the code refactoring in 0.3.11)
|
||||||
|
* fixed unhandled partial register clobbers in several kernels
|
||||||
|
for AXPY,DOT,GEMV_N and GEMV_T flagged by gcc10 tree-vectorizer
|
||||||
|
|
||||||
|
ARMV8:
|
||||||
|
* improved Apple Vortex support to include cross-compiling
|
||||||
|
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.11
|
||||||
|
17-Oct-2020
|
||||||
|
|
||||||
|
common:
|
||||||
|
* API change:
|
||||||
|
the newly added BFLOAT16 functions were renamed to use the
|
||||||
|
letter "B" instead of "H" to avoid potential confusion with
|
||||||
|
the IEEE "half precision float" type, i.e. the 0.3.10
|
||||||
|
SHGEMM is now SBGEMM and the corresponding build option
|
||||||
|
was changed from "BUILD_HALF" to "BUILD_BFLOAT16".
|
||||||
|
* Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper
|
||||||
|
limit for placing temporary arrays on the stack) to be compatible
|
||||||
|
with a stack size of 1mb (as imposed by the JAVA runtime library)
|
||||||
|
* Added mixed-precision dot function SBDOT and utility functions
|
||||||
|
shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between
|
||||||
|
single or double precision float arrays and bfloat16 arrays
|
||||||
|
* Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions
|
||||||
|
in lapack.h
|
||||||
|
* Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2
|
||||||
|
(causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263)
|
||||||
|
* Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415)
|
||||||
|
* Fixed several bugs in the LAPACK testsuite
|
||||||
|
* Improved performance of TRMM and TRSM for certain problem sizes
|
||||||
|
* Fixed infinite recursions and workspace miscalculations in ReLAPACK
|
||||||
|
* CMAKE builds no longer require pkg-config for creating the .pc file
|
||||||
|
* Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as
|
||||||
|
enabling these options
|
||||||
|
* Fixed detection of gfortran when invoked through an mpi wrapper
|
||||||
|
* Improve thread reinitialization performance with OpenMP after a fork
|
||||||
|
* Added support for building only the subset of the library required
|
||||||
|
for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE
|
||||||
|
* Optional function name prefixes and suffixes are now correctly
|
||||||
|
reflected in the generated cblas.h
|
||||||
|
* Added CMAKE build support for the LAPACK and multithreading tests
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Added optimized support for POWER10
|
||||||
|
* Added support for compiling for POWER8 in 32bit mode
|
||||||
|
* Added support for compilation with LLVM/clang
|
||||||
|
* Added support for compilation with NVIDIA/PGI compilers
|
||||||
|
* Fixed building on big-endian POWER8
|
||||||
|
* Fixed miscompilation of ZDOTC by gcc10
|
||||||
|
* Fixed alignment errors in the POWER8 SAXPY kernel
|
||||||
|
* Improved CPU detection on AIX
|
||||||
|
* Supported building with older compilers on POWER9
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* Added support for Intel Cooperlake
|
||||||
|
* Added autodetection of AMD Renoir/Matisse/Zen3 cpus
|
||||||
|
* Added autodetection of Intel Comet Lake cpus
|
||||||
|
* Reimplemented ?sum, ?dot and daxpy using universal intrinsics
|
||||||
|
* Reset the fpu state before using the fpu on Windows as a workaround
|
||||||
|
for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004)
|
||||||
|
* Fixed potentially undefined behaviour in the dot and gemv_t kernels
|
||||||
|
* Fixed a potential segmentation fault in DYNAMIC_ARCH builds
|
||||||
|
* Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers
|
||||||
|
|
||||||
|
ARMV7:
|
||||||
|
* Fixed cpu detection on BSD-like systems
|
||||||
|
|
||||||
|
ARMV8:
|
||||||
|
* Added preliminary support for Apple Vortex cpus
|
||||||
|
* Added support for the Cavium ThunderX3T110 cpu
|
||||||
|
* Fixed cpu detection on BSD-like systems
|
||||||
|
* Fixed compilation in -std=C18 mode
|
||||||
|
|
||||||
|
IBM Z:
|
||||||
|
* Added support for compiling with the clang compiler
|
||||||
|
* Improved GEMM performance on Z14
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.3.10
|
Version 0.3.10
|
||||||
14-Jun-2020
|
14-Jun-2020
|
||||||
|
|
10
Makefile
10
Makefile
|
@ -59,6 +59,9 @@ endif
|
||||||
@$(CC) --version > /dev/null 2>&1;\
|
@$(CC) --version > /dev/null 2>&1;\
|
||||||
if [ $$? -eq 0 ]; then \
|
if [ $$? -eq 0 ]; then \
|
||||||
cverinfo=`$(CC) --version | sed -n '1p'`; \
|
cverinfo=`$(CC) --version | sed -n '1p'`; \
|
||||||
|
if [ -z "$${cverinfo}" ]; then \
|
||||||
|
cverinfo=`$(CC) --version | sed -n '2p'`; \
|
||||||
|
fi; \
|
||||||
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
|
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
|
||||||
else \
|
else \
|
||||||
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
|
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
|
||||||
|
@ -67,6 +70,9 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||||
@$(FC) --version > /dev/null 2>&1;\
|
@$(FC) --version > /dev/null 2>&1;\
|
||||||
if [ $$? -eq 0 ]; then \
|
if [ $$? -eq 0 ]; then \
|
||||||
fverinfo=`$(FC) --version | sed -n '1p'`; \
|
fverinfo=`$(FC) --version | sed -n '1p'`; \
|
||||||
|
if [ -z "$${fverinfo}" ]; then \
|
||||||
|
fverinfo=`$(FC) --version | sed -n '2p'`; \
|
||||||
|
fi; \
|
||||||
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
|
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
|
||||||
else \
|
else \
|
||||||
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
|
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
|
||||||
|
@ -268,7 +274,11 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1)
|
||||||
|
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
else
|
||||||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
endif
|
||||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
|
|
@ -12,3 +12,8 @@ ifeq ($(CORE), ARMV6)
|
||||||
CCOMMON_OPT += -mfpu=vfp
|
CCOMMON_OPT += -mfpu=vfp
|
||||||
FCOMMON_OPT += -mfpu=vfp
|
FCOMMON_OPT += -mfpu=vfp
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef HAVE_NEON
|
||||||
|
CCOMMON_OPT += -mfpu=neon
|
||||||
|
FCOMMON_OPT += -mfpu=neon
|
||||||
|
endif
|
||||||
|
|
|
@ -1,28 +1,38 @@
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
ifeq ($(CORE), ARMV8)
|
ifeq ($(CORE), ARMV8)
|
||||||
CCOMMON_OPT += -march=armv8-a
|
CCOMMON_OPT += -march=armv8-a
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8-a
|
FCOMMON_OPT += -march=armv8-a
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), CORTEXA53)
|
ifeq ($(CORE), CORTEXA53)
|
||||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), CORTEXA57)
|
ifeq ($(CORE), CORTEXA57)
|
||||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), CORTEXA72)
|
ifeq ($(CORE), CORTEXA72)
|
||||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), CORTEXA73)
|
ifeq ($(CORE), CORTEXA73)
|
||||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
# Use a72 tunings because Neoverse-N1 is only available
|
# Use a72 tunings because Neoverse-N1 is only available
|
||||||
# in GCC>=9
|
# in GCC>=9
|
||||||
|
@ -30,51 +40,71 @@ ifeq ($(CORE), NEOVERSEN1)
|
||||||
ifeq ($(GCCVERSIONGTEQ7), 1)
|
ifeq ($(GCCVERSIONGTEQ7), 1)
|
||||||
ifeq ($(GCCVERSIONGTEQ9), 1)
|
ifeq ($(GCCVERSIONGTEQ9), 1)
|
||||||
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), THUNDERX)
|
ifeq ($(CORE), THUNDERX)
|
||||||
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), FALKOR)
|
ifeq ($(CORE), FALKOR)
|
||||||
CCOMMON_OPT += -march=armv8-a -mtune=falkor
|
CCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8-a -mtune=falkor
|
FCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), THUNDERX2T99)
|
ifeq ($(CORE), THUNDERX2T99)
|
||||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), THUNDERX3T110)
|
ifeq ($(CORE), THUNDERX3T110)
|
||||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||||
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
|
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
|
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), VORTEX)
|
ifeq ($(CORE), VORTEX)
|
||||||
CCOMMON_OPT += -march=armv8.3-a
|
CCOMMON_OPT += -march=armv8.3-a
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8.3-a
|
FCOMMON_OPT += -march=armv8.3-a
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(GCCVERSIONGTEQ9), 1)
|
ifeq ($(GCCVERSIONGTEQ9), 1)
|
||||||
ifeq ($(CORE), TSV110)
|
ifeq ($(CORE), TSV110)
|
||||||
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
|
@ -9,7 +9,7 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||||
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/$(LIBSONAMEBASE)
|
||||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
||||||
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
|
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
|
||||||
|
@ -150,13 +150,13 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#Generating openblas.pc
|
#Generating openblas.pc
|
||||||
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
@echo Generating $(LIBSONAMEBASE).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||||
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||||
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||||
@echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
@echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||||
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
|
||||||
|
|
||||||
|
|
||||||
#Generating OpenBLASConfig.cmake
|
#Generating OpenBLASConfig.cmake
|
||||||
|
|
|
@ -10,9 +10,11 @@ USE_OPENMP = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER10)
|
ifeq ($(CORE), POWER10)
|
||||||
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER9)
|
ifeq ($(CORE), POWER9)
|
||||||
ifneq ($(C_COMPILER), PGI)
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
|
|
@ -41,6 +41,10 @@ ifeq ($(TARGET), I6500)
|
||||||
TARGET_FLAGS = -mips64r6
|
TARGET_FLAGS = -mips64r6
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), C910V)
|
||||||
|
TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v
|
||||||
|
endif
|
||||||
|
|
||||||
all: getarch_2nd
|
all: getarch_2nd
|
||||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
ifeq ($(CORE), C910V)
|
||||||
|
CCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v
|
||||||
|
FCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v -static
|
||||||
|
endif
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.3.10.dev
|
VERSION = 0.3.14.dev
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
@ -295,10 +295,13 @@ COMMON_PROF = -pg
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# the below is not yet configurable, use cmake if you need to build only select types
|
# By default the library contains BLAS functions (and LAPACK if selected) for all input types.
|
||||||
BUILD_SINGLE = 1
|
# To build a smaller library supporting e.g. only single precision real (SGEMM etc.) or only
|
||||||
BUILD_DOUBLE = 1
|
# the functions for complex numbers, uncomment the desired type(s) below
|
||||||
BUILD_COMPLEX = 1
|
# BUILD_SINGLE = 1
|
||||||
BUILD_COMPLEX16 = 1
|
# BUILD_DOUBLE = 1
|
||||||
|
# BUILD_COMPLEX = 1
|
||||||
|
# BUILD_COMPLEX16 = 1
|
||||||
|
#
|
||||||
# End of user configuration
|
# End of user configuration
|
||||||
#
|
#
|
||||||
|
|
|
@ -3,21 +3,29 @@ RANLIB = ranlib
|
||||||
|
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
|
|
||||||
|
ifeq ($(C_COMPILER), GCC)
|
||||||
CCOMMON_OPT += -mcpu=v9 -m64
|
CCOMMON_OPT += -mcpu=v9 -m64
|
||||||
|
else
|
||||||
|
CCOMMON_OPT += -m64
|
||||||
|
endif
|
||||||
ifeq ($(COMPILER_F77), g77)
|
ifeq ($(COMPILER_F77), g77)
|
||||||
FCOMMON_OPT += -mcpu=v9 -m64
|
FCOMMON_OPT += -mcpu=v9 -m64
|
||||||
endif
|
endif
|
||||||
ifeq ($(COMPILER_F77), f90)
|
ifeq ($(COMPILER_F77), f95)
|
||||||
FCOMMON_OPT += -xarch=v9
|
FCOMMON_OPT += -m64
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
|
|
||||||
|
ifeq ($(C_COMPILER), GCC)
|
||||||
CCOMMON_OPT += -mcpu=v9
|
CCOMMON_OPT += -mcpu=v9
|
||||||
|
else
|
||||||
|
CCOMMON_OPT += -xarch=v9
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(COMPILER_F77), g77)
|
ifeq ($(COMPILER_F77), g77)
|
||||||
FCOMMON_OPT += -mcpu=v9
|
FCOMMON_OPT += -mcpu=v9
|
||||||
endif
|
endif
|
||||||
ifeq ($(COMPILER_F77), f90)
|
ifeq ($(COMPILER_F77), f95)
|
||||||
FCOMMON_OPT += -xarch=v8plusb
|
FCOMMON_OPT += -xarch=v8plusb
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
151
Makefile.system
151
Makefile.system
|
@ -21,6 +21,8 @@ ifeq ($(ARCH), amd64)
|
||||||
override ARCH=x86_64
|
override ARCH=x86_64
|
||||||
else ifeq ($(ARCH), powerpc64)
|
else ifeq ($(ARCH), powerpc64)
|
||||||
override ARCH=power
|
override ARCH=power
|
||||||
|
else ifeq ($(ARCH), powerpc64le)
|
||||||
|
override ARCH=power
|
||||||
else ifeq ($(ARCH), powerpc)
|
else ifeq ($(ARCH), powerpc)
|
||||||
override ARCH=power
|
override ARCH=power
|
||||||
else ifeq ($(ARCH), i386)
|
else ifeq ($(ARCH), i386)
|
||||||
|
@ -93,6 +95,12 @@ endif
|
||||||
ifdef TARGET
|
ifdef TARGET
|
||||||
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
||||||
GETARCH_FLAGS += -DUSER_TARGET
|
GETARCH_FLAGS += -DUSER_TARGET
|
||||||
|
ifeq ($(TARGET), GENERIC)
|
||||||
|
ifeq ($(DYNAMIC_ARCH), 1)
|
||||||
|
override NO_EXPRECISION=1
|
||||||
|
export NO_EXPRECiSION
|
||||||
|
endif
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Force fallbacks for 32bit
|
# Force fallbacks for 32bit
|
||||||
|
@ -175,7 +183,7 @@ endif
|
||||||
|
|
||||||
# On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch.
|
# On x86_64 build getarch with march=native unless the compiler is PGI. This is required to detect AVX512 support in getarch.
|
||||||
ifeq ($(HOSTARCH), x86_64)
|
ifeq ($(HOSTARCH), x86_64)
|
||||||
ifeq ($(findstring pgcc,$(HOSTCC)),)
|
ifeq ($(findstring pgcc,$(HOSTCC))$(findstring nvc,$(HOSTCC)),)
|
||||||
GETARCH_FLAGS += -march=native
|
GETARCH_FLAGS += -march=native
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
@ -246,6 +254,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)"
|
||||||
ifndef TARGET_CORE
|
ifndef TARGET_CORE
|
||||||
include $(TOPDIR)/Makefile.conf
|
include $(TOPDIR)/Makefile.conf
|
||||||
else
|
else
|
||||||
|
HAVE_NEON=
|
||||||
|
HAVE_VFP=
|
||||||
|
HAVE_VFPV3=
|
||||||
|
HAVE_VFPV4=
|
||||||
|
HAVE_MMX=
|
||||||
|
HAVE_SSE=
|
||||||
|
HAVE_SSE2=
|
||||||
|
HAVE_SSE3=
|
||||||
|
HAVE_SSSE3=
|
||||||
|
HAVE_SSE4_1=
|
||||||
|
HAVE_SSE4_2=
|
||||||
|
HAVE_SSE4A=
|
||||||
|
HAVE_SSE5=
|
||||||
|
HAVE_AVX=
|
||||||
|
HAVE_AVX2=
|
||||||
|
HAVE_FMA3=
|
||||||
include $(TOPDIR)/Makefile_kernel.conf
|
include $(TOPDIR)/Makefile_kernel.conf
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -319,6 +343,7 @@ ifeq ($(GCCVERSIONGTEQ7),1)
|
||||||
else
|
else
|
||||||
GCCDUMPVERSION_PARAM := -dumpversion
|
GCCDUMPVERSION_PARAM := -dumpversion
|
||||||
endif
|
endif
|
||||||
|
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 1)
|
||||||
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
|
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
|
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
|
||||||
endif
|
endif
|
||||||
|
@ -600,6 +625,15 @@ DYNAMIC_CORE += THUNDERX2T99
|
||||||
DYNAMIC_CORE += TSV110
|
DYNAMIC_CORE += TSV110
|
||||||
DYNAMIC_CORE += EMAG8180
|
DYNAMIC_CORE += EMAG8180
|
||||||
DYNAMIC_CORE += THUNDERX3T110
|
DYNAMIC_CORE += THUNDERX3T110
|
||||||
|
ifdef DYNAMIC_LIST
|
||||||
|
override DYNAMIC_CORE = ARMV8 $(DYNAMIC_LIST)
|
||||||
|
XCCOMMON_OPT = -DDYNAMIC_LIST -DDYN_ARMV8
|
||||||
|
XCCOMMON_OPT += $(foreach dcore,$(DYNAMIC_LIST),-DDYN_$(dcore))
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(ARCH), mips64)
|
||||||
|
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), zarch)
|
ifeq ($(ARCH), zarch)
|
||||||
|
@ -636,11 +670,13 @@ endif
|
||||||
endif # ARCH zarch
|
endif # ARCH zarch
|
||||||
|
|
||||||
ifeq ($(ARCH), power)
|
ifeq ($(ARCH), power)
|
||||||
|
ifneq ($(C_COMPILER), PGI)
|
||||||
DYNAMIC_CORE = POWER6
|
DYNAMIC_CORE = POWER6
|
||||||
DYNAMIC_CORE += POWER8
|
DYNAMIC_CORE += POWER8
|
||||||
ifneq ($(C_COMPILER), GCC)
|
ifneq ($(C_COMPILER), GCC)
|
||||||
DYNAMIC_CORE += POWER9
|
DYNAMIC_CORE += POWER9
|
||||||
DYNAMIC_CORE += POWER10
|
DYNAMIC_CORE += POWER10
|
||||||
|
CCOMMON_OPT += -DHAVE_P10_SUPPORT
|
||||||
endif
|
endif
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
ifeq ($(GCCVERSIONGT5), 1)
|
ifeq ($(GCCVERSIONGT5), 1)
|
||||||
|
@ -648,16 +684,23 @@ DYNAMIC_CORE += POWER9
|
||||||
else
|
else
|
||||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
|
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
|
||||||
endif
|
endif
|
||||||
ifeq ($(GCCVERSIONGTEQ11), 1)
|
LDVERSIONGTEQ35 := $(shell expr `$(CC) -Wl,--version 2> /dev/null | head -1 | cut -f2 -d "." | cut -f1 -d "-"` \>= 35)
|
||||||
|
ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
|
||||||
DYNAMIC_CORE += POWER10
|
DYNAMIC_CORE += POWER10
|
||||||
|
CCOMMON_OPT += -DHAVE_P10_SUPPORT
|
||||||
else ifeq ($(GCCVERSIONGTEQ10), 1)
|
else ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||||
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
|
ifeq ($(GCCMINORVERSIONGTEQ2)$(LDVERSIONGTEQ35), 11)
|
||||||
DYNAMIC_CORE += POWER10
|
DYNAMIC_CORE += POWER10
|
||||||
|
CCOMMON_OPT += -DHAVE_P10_SUPPORT
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
|
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
else
|
||||||
|
DYNAMIC_CORE = POWER8
|
||||||
|
DYNAMIC_CORE += POWER9
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
# If DYNAMIC_CORE is not set, DYNAMIC_ARCH cannot do anything, so force it to empty
|
||||||
|
@ -724,7 +767,10 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(ARCH), riscv64)
|
||||||
|
NO_BINARY_MODE = 1
|
||||||
|
BINARY_DEFINED = 1
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -757,14 +803,9 @@ CCOMMON_OPT += -mabi=32
|
||||||
BINARY_DEFINED = 1
|
BINARY_DEFINED = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), LOONGSON3A)
|
ifeq ($(CORE), $(filter $(CORE),LOONGSON3R3 LOONGSON3R4))
|
||||||
CCOMMON_OPT += -march=mips64
|
CCOMMON_OPT += -march=loongson3a
|
||||||
FCOMMON_OPT += -march=mips64
|
FCOMMON_OPT += -march=loongson3a
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(CORE), LOONGSON3B)
|
|
||||||
CCOMMON_OPT += -march=mips64
|
|
||||||
FCOMMON_OPT += -march=mips64
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), MIPS24K)
|
ifeq ($(CORE), MIPS24K)
|
||||||
|
@ -806,7 +847,9 @@ endif
|
||||||
ifndef BINARY_DEFINED
|
ifndef BINARY_DEFINED
|
||||||
ifneq ($(OSNAME), AIX)
|
ifneq ($(OSNAME), AIX)
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
|
ifneq ($(ARCH), riscv64)
|
||||||
CCOMMON_OPT += -m64
|
CCOMMON_OPT += -m64
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -m32
|
CCOMMON_OPT += -m32
|
||||||
endif
|
endif
|
||||||
|
@ -816,9 +859,19 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(C_COMPILER), PGI)
|
ifeq ($(C_COMPILER), PGI)
|
||||||
|
PGCVERSIONGT20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \> 20)
|
||||||
|
PGCVERSIONGTEQ20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \>= 20)
|
||||||
|
PGCMINORVERSIONGE11 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -c 4-5` == 11)
|
||||||
|
PGCVERSIONCHECK := $(PGCVERSIONGT20)$(PGCVERSIONEQ20)$(PGCMINORVERSIONGE11)
|
||||||
|
ifeq ($(PGCVERSIONCHECK), $(filter $(PGCVERSIONCHECK), 110 111 011))
|
||||||
|
NEWPGI := 1
|
||||||
|
endif
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
CCOMMON_OPT += -tp p7-64 -D__MMX__ -Mnollvm
|
CCOMMON_OPT += -tp p7-64
|
||||||
|
ifneq ($(NEWPGI),1)
|
||||||
|
CCOMMON_OPT += -D__MMX__ -Mnollvm
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
ifeq ($(ARCH), power)
|
ifeq ($(ARCH), power)
|
||||||
ifeq ($(CORE), POWER8)
|
ifeq ($(CORE), POWER8)
|
||||||
|
@ -846,13 +899,25 @@ endif
|
||||||
# Fortran Compiler dependent settings
|
# Fortran Compiler dependent settings
|
||||||
#
|
#
|
||||||
|
|
||||||
|
ifeq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -dcfuns -recursive -ieee=full -w=obs -thread_safe
|
||||||
|
ifdef INTERFACE64
|
||||||
|
ifneq ($(INTERFACE64), 0)
|
||||||
|
FCOMMON_OPT += -i8
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ifeq ($(USE_OPENMP), 1)
|
||||||
|
FCOMMON_OPT += -openmp
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(F_COMPILER), FLANG)
|
ifeq ($(F_COMPILER), FLANG)
|
||||||
CCOMMON_OPT += -DF_INTERFACE_FLANG
|
CCOMMON_OPT += -DF_INTERFACE_FLANG
|
||||||
FCOMMON_OPT += -Mrecursive -Kieee
|
FCOMMON_OPT += -Mrecursive -Kieee
|
||||||
ifeq ($(OSNAME), Linux)
|
ifeq ($(OSNAME), Linux)
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
|
FLANG_VENDOR := $(shell $(FC) --version|head -1 |cut -f 1 -d " ")
|
||||||
ifeq ($(FLANG_VENDOR),AOCC)
|
ifeq ($(FLANG_VENDOR), AMD)
|
||||||
FCOMMON_OPT += -fno-unroll-loops
|
FCOMMON_OPT += -fno-unroll-loops
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
@ -927,8 +992,10 @@ endif
|
||||||
else
|
else
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
ifneq ($(OSNAME), AIX)
|
ifneq ($(OSNAME), AIX)
|
||||||
|
ifneq ($(ARCH), riscv64)
|
||||||
FCOMMON_OPT += -m64
|
FCOMMON_OPT += -m64
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
ifdef INTERFACE64
|
ifdef INTERFACE64
|
||||||
ifneq ($(INTERFACE64), 0)
|
ifneq ($(INTERFACE64), 0)
|
||||||
FCOMMON_OPT += -fdefault-integer-8
|
FCOMMON_OPT += -fdefault-integer-8
|
||||||
|
@ -996,18 +1063,24 @@ ifeq ($(ARCH), x86_64)
|
||||||
FCOMMON_OPT += -tp p7-64
|
FCOMMON_OPT += -tp p7-64
|
||||||
else
|
else
|
||||||
ifeq ($(ARCH), power)
|
ifeq ($(ARCH), power)
|
||||||
|
ifeq ($(CORE), POWER6)
|
||||||
|
$(warning NVIDIA HPC compilers do not support POWER6.)
|
||||||
|
endif
|
||||||
ifeq ($(CORE), POWER8)
|
ifeq ($(CORE), POWER8)
|
||||||
FCOMMON_OPT += -tp pwr8
|
FCOMMON_OPT += -tp pwr8
|
||||||
endif
|
endif
|
||||||
ifeq ($(CORE), POWER9)
|
ifeq ($(CORE), POWER9)
|
||||||
FCOMMON_OPT += -tp pwr9
|
FCOMMON_OPT += -tp pwr9
|
||||||
endif
|
endif
|
||||||
|
ifeq ($(CORE), POWER10)
|
||||||
|
$(warning NVIDIA HPC compilers do not support POWER10.)
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -tp p7
|
FCOMMON_OPT += -tp p7
|
||||||
endif
|
endif
|
||||||
FCOMMON_OPT += -Mrecursive
|
FCOMMON_OPT += -Mrecursive -Kieee
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
FCOMMON_OPT += -mp
|
FCOMMON_OPT += -mp
|
||||||
endif
|
endif
|
||||||
|
@ -1044,11 +1117,11 @@ FCOMMON_OPT += -n32
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -n64
|
FCOMMON_OPT += -n64
|
||||||
endif
|
endif
|
||||||
ifeq ($(CORE), LOONGSON3A)
|
ifeq ($(CORE), LOONGSON3R3)
|
||||||
FCOMMON_OPT += -loongson3 -static
|
FCOMMON_OPT += -loongson3 -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), LOONGSON3B)
|
ifeq ($(CORE), LOONGSON3R4)
|
||||||
FCOMMON_OPT += -loongson3 -static
|
FCOMMON_OPT += -loongson3 -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -1074,11 +1147,11 @@ CCOMMON_OPT += -n32
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -n64
|
CCOMMON_OPT += -n64
|
||||||
endif
|
endif
|
||||||
ifeq ($(CORE), LOONGSON3A)
|
ifeq ($(CORE), LOONGSON3R3)
|
||||||
CCOMMON_OPT += -loongson3 -static
|
CCOMMON_OPT += -loongson3 -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), LOONGSON3B)
|
ifeq ($(CORE), LOONGSON3R4)
|
||||||
CCOMMON_OPT += -loongson3 -static
|
CCOMMON_OPT += -loongson3 -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -1097,16 +1170,25 @@ CCOMMON_OPT += -w
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
CCOMMON_OPT += -m32
|
CCOMMON_OPT += -m32
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -m64
|
ifdef BINARY64
|
||||||
|
CCOMMON_OPT += -m64
|
||||||
|
else
|
||||||
|
CCOMMON_OPT += -m32
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(F_COMPILER), SUN)
|
ifeq ($(F_COMPILER), SUN)
|
||||||
CCOMMON_OPT += -DF_INTERFACE_SUN
|
CCOMMON_OPT += -DF_INTERFACE_SUN
|
||||||
|
FCOMMON_OPT += -ftrap=%none -xrecursive
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
FCOMMON_OPT += -m32
|
FCOMMON_OPT += -m32
|
||||||
else
|
else
|
||||||
|
ifdef BINARY64
|
||||||
FCOMMON_OPT += -m64
|
FCOMMON_OPT += -m64
|
||||||
|
else
|
||||||
|
FCOMMON_OPT += -m32
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
FCOMMON_OPT += -xopenmp=parallel
|
FCOMMON_OPT += -xopenmp=parallel
|
||||||
|
@ -1137,6 +1219,8 @@ CCOMMON_OPT += -fPIC
|
||||||
endif
|
endif
|
||||||
ifeq ($(F_COMPILER), SUN)
|
ifeq ($(F_COMPILER), SUN)
|
||||||
FCOMMON_OPT += -pic
|
FCOMMON_OPT += -pic
|
||||||
|
else ifeq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -PIC
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -fPIC
|
FCOMMON_OPT += -fPIC
|
||||||
endif
|
endif
|
||||||
|
@ -1180,10 +1264,8 @@ ifdef SMP
|
||||||
CCOMMON_OPT += -DSMP_SERVER
|
CCOMMON_OPT += -DSMP_SERVER
|
||||||
|
|
||||||
ifeq ($(ARCH), mips64)
|
ifeq ($(ARCH), mips64)
|
||||||
ifneq ($(CORE), LOONGSON3B)
|
|
||||||
USE_SIMPLE_THREADED_LEVEL3 = 1
|
USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||||
|
@ -1216,6 +1298,10 @@ CCOMMON_OPT += -DUSE_PAPI
|
||||||
EXTRALIB += -lpapi -lperfctr
|
EXTRALIB += -lpapi -lperfctr
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef BUFFERSIZE
|
||||||
|
CCOMMON_OPT += -DBUFFERSIZE=$(BUFFERSIZE)
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef DYNAMIC_THREADS
|
ifdef DYNAMIC_THREADS
|
||||||
CCOMMON_OPT += -DDYNAMIC_THREADS
|
CCOMMON_OPT += -DDYNAMIC_THREADS
|
||||||
endif
|
endif
|
||||||
|
@ -1258,10 +1344,14 @@ ifndef SYMBOLSUFFIX
|
||||||
SYMBOLSUFFIX =
|
SYMBOLSUFFIX =
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef LIBSONAMEBASE
|
||||||
|
LIBSONAMEBASE = openblas
|
||||||
|
endif
|
||||||
|
|
||||||
ifndef LIBNAMESUFFIX
|
ifndef LIBNAMESUFFIX
|
||||||
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
|
LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX)
|
||||||
else
|
else
|
||||||
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
|
LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
|
@ -1275,8 +1365,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||||
include $(TOPDIR)/Makefile.$(ARCH)
|
include $(TOPDIR)/Makefile.$(ARCH)
|
||||||
|
|
||||||
ifneq ($(C_COMPILER), PGI)
|
ifneq ($(C_COMPILER), PGI)
|
||||||
|
ifneq ($(C_COMPILER), SUN)
|
||||||
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
|
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
|
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
|
||||||
|
|
||||||
ifeq ($(CORE), PPC440)
|
ifeq ($(CORE), PPC440)
|
||||||
|
@ -1293,11 +1385,9 @@ endif
|
||||||
|
|
||||||
ifneq ($(ARCH), x86_64)
|
ifneq ($(ARCH), x86_64)
|
||||||
ifneq ($(ARCH), x86)
|
ifneq ($(ARCH), x86)
|
||||||
ifneq ($(CORE), LOONGSON3B)
|
|
||||||
NO_AFFINITY = 1
|
NO_AFFINITY = 1
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef NO_AFFINITY
|
ifdef NO_AFFINITY
|
||||||
ifeq ($(NO_AFFINITY), 0)
|
ifeq ($(NO_AFFINITY), 0)
|
||||||
|
@ -1389,6 +1479,10 @@ LAPACK_FFLAGS := $(FFLAGS)
|
||||||
LAPACK_FPFLAGS := $(FPFLAGS)
|
LAPACK_FPFLAGS := $(FPFLAGS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(F_COMPILER),NAG)
|
||||||
|
LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
|
||||||
|
endif
|
||||||
|
|
||||||
LAPACK_CFLAGS = $(CFLAGS)
|
LAPACK_CFLAGS = $(CFLAGS)
|
||||||
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
LAPACK_CFLAGS += -DHAVE_LAPACK_CONFIG_H
|
||||||
ifdef INTERFACE64
|
ifdef INTERFACE64
|
||||||
|
@ -1511,6 +1605,8 @@ export HAVE_SSE4_2
|
||||||
export HAVE_SSE4A
|
export HAVE_SSE4A
|
||||||
export HAVE_SSE5
|
export HAVE_SSE5
|
||||||
export HAVE_AVX
|
export HAVE_AVX
|
||||||
|
export HAVE_AVX2
|
||||||
|
export HAVE_FMA3
|
||||||
export HAVE_VFP
|
export HAVE_VFP
|
||||||
export HAVE_VFPV3
|
export HAVE_VFPV3
|
||||||
export HAVE_VFPV4
|
export HAVE_VFPV4
|
||||||
|
@ -1521,6 +1617,7 @@ export KERNELDIR
|
||||||
export FUNCTION_PROFILE
|
export FUNCTION_PROFILE
|
||||||
export TARGET_CORE
|
export TARGET_CORE
|
||||||
export NO_AVX512
|
export NO_AVX512
|
||||||
|
export NO_AVX2
|
||||||
export BUILD_BFLOAT16
|
export BUILD_BFLOAT16
|
||||||
|
|
||||||
export SBGEMM_UNROLL_M
|
export SBGEMM_UNROLL_M
|
||||||
|
|
33
Makefile.x86
33
Makefile.x86
|
@ -1,5 +1,21 @@
|
||||||
# COMPILER_PREFIX = mingw32-
|
# COMPILER_PREFIX = mingw32-
|
||||||
|
|
||||||
|
ifndef DYNAMIC_ARCH
|
||||||
|
ADD_CPUFLAGS = 1
|
||||||
|
else
|
||||||
|
ifdef TARGET_CORE
|
||||||
|
ADD_CPUFLAGS = 1
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef ADD_CPUFLAGS
|
||||||
|
ifdef HAVE_SSE
|
||||||
|
CCOMMON_OPT += -msse
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -msse
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), Interix)
|
ifeq ($(OSNAME), Interix)
|
||||||
ARFLAGS = -m x86
|
ARFLAGS = -m x86
|
||||||
|
@ -54,3 +70,20 @@ LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
|
||||||
else
|
else
|
||||||
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
|
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
|
||||||
endif
|
endif
|
||||||
|
ifdef HAVE_SSE2
|
||||||
|
CCOMMON_OPT += -msse2
|
||||||
|
FCOMMON_OPT += -msse2
|
||||||
|
endif
|
||||||
|
ifdef HAVE_SSE3
|
||||||
|
CCOMMON_OPT += -msse3
|
||||||
|
FCOMMON_OPT += -msse3
|
||||||
|
ifdef HAVE_SSSE3
|
||||||
|
CCOMMON_OPT += -mssse3
|
||||||
|
FCOMMON_OPT += -mssse3
|
||||||
|
endif
|
||||||
|
ifdef HAVE_SSE4_1
|
||||||
|
CCOMMON_OPT += -msse4.1
|
||||||
|
FCOMMON_OPT += -msse4.1
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
|
@ -8,22 +8,57 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef HAVE_SSE3
|
|
||||||
ifndef DYNAMIC_ARCH
|
ifndef DYNAMIC_ARCH
|
||||||
|
ADD_CPUFLAGS = 1
|
||||||
|
else
|
||||||
|
ifdef TARGET_CORE
|
||||||
|
ADD_CPUFLAGS = 1
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef ADD_CPUFLAGS
|
||||||
|
ifdef HAVE_SSE3
|
||||||
CCOMMON_OPT += -msse3
|
CCOMMON_OPT += -msse3
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -msse3
|
FCOMMON_OPT += -msse3
|
||||||
|
endif
|
||||||
|
endif
|
||||||
ifdef HAVE_SSSE3
|
ifdef HAVE_SSSE3
|
||||||
CCOMMON_OPT += -mssse3
|
CCOMMON_OPT += -mssse3
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -mssse3
|
FCOMMON_OPT += -mssse3
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
ifdef HAVE_SSE4_1
|
||||||
|
CCOMMON_OPT += -msse4.1
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -msse4.1
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ifndef OLDGCC
|
||||||
|
ifdef HAVE_AVX
|
||||||
|
CCOMMON_OPT += -mavx
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -mavx
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
ifndef NO_AVX2
|
||||||
|
ifdef HAVE_AVX2
|
||||||
|
CCOMMON_OPT += -mavx2
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
|
FCOMMON_OPT += -mavx2
|
||||||
|
endif
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), SKYLAKEX)
|
ifeq ($(CORE), SKYLAKEX)
|
||||||
ifndef DYNAMIC_ARCH
|
|
||||||
ifndef NO_AVX512
|
ifndef NO_AVX512
|
||||||
CCOMMON_OPT += -march=skylake-avx512
|
CCOMMON_OPT += -march=skylake-avx512
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=skylake-avx512
|
FCOMMON_OPT += -march=skylake-avx512
|
||||||
|
endif
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||||
|
@ -36,20 +71,18 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(CORE), COOPERLAKE)
|
ifeq ($(CORE), COOPERLAKE)
|
||||||
ifndef DYNAMIC_ARCH
|
|
||||||
ifndef NO_AVX512
|
ifndef NO_AVX512
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
# cooperlake support was added in 10.1
|
# cooperlake support was added in 10.1
|
||||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
|
|
||||||
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1)
|
|
||||||
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
|
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
|
||||||
CCOMMON_OPT += -march=cooperlake
|
CCOMMON_OPT += -march=cooperlake
|
||||||
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=cooperlake
|
FCOMMON_OPT += -march=cooperlake
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||||
|
@ -62,31 +95,38 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
|
ifdef HAVE_AVX2
|
||||||
ifndef DYNAMIC_ARCH
|
|
||||||
ifndef NO_AVX2
|
ifndef NO_AVX2
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
# AVX2 support was added in 4.7.0
|
# AVX2 support was added in 4.7.0
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
CCOMMON_OPT += -mavx2
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
CCOMMON_OPT += -mavx2
|
CCOMMON_OPT += -mavx2
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
ifeq ($(F_COMPILER), GFORTRAN)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
# AVX2 support was added in 4.7.0
|
# AVX2 support was added in 4.7.0
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||||
|
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||||
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||||
|
FCOMMON_OPT += -mavx2
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
ifeq ($(F_COMPILER), FLANG)
|
||||||
FCOMMON_OPT += -mavx2
|
FCOMMON_OPT += -mavx2
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(OSNAME), Interix)
|
ifeq ($(OSNAME), Interix)
|
||||||
|
|
16
README.md
16
README.md
|
@ -13,10 +13,14 @@ Drone CI: [ library based on GotoBLAS2 1.13 BSD version.
|
||||||
|
|
||||||
Please read the documentation on the OpenBLAS wiki pages: <https://github.com/xianyi/OpenBLAS/wiki>.
|
Please read the documentation on the OpenBLAS wiki pages: <https://github.com/xianyi/OpenBLAS/wiki>.
|
||||||
|
|
||||||
|
For a general introduction to the BLAS routines, please refer to the extensive documentation of their reference implementation hosted at netlib:
|
||||||
|
<https://www.netlib.org/blas>. On that site you will likewise find documentation for the reference implementation of the higher-level library LAPACK - the **L**inear **A**lgebra **Pack**age that comes included with OpenBLAS. If you are looking for a general primer or refresher on Linear Algebra, the set of six
|
||||||
|
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare <https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/> or Youtube <https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek> may be helpful.
|
||||||
|
|
||||||
## Binary Packages
|
## Binary Packages
|
||||||
|
|
||||||
We provide official binary packages for the following platform:
|
We provide official binary packages for the following platform:
|
||||||
|
@ -172,6 +176,13 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
|
||||||
- **Z13**: Optimized Level-3 BLAS and Level-1,2
|
- **Z13**: Optimized Level-3 BLAS and Level-1,2
|
||||||
- **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2
|
- **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2
|
||||||
|
|
||||||
|
#### RISC-V
|
||||||
|
|
||||||
|
- **C910V**: Optimized Leve-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1.
|
||||||
|
```sh
|
||||||
|
make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran
|
||||||
|
```
|
||||||
|
|
||||||
### Support for multiple targets in a single library
|
### Support for multiple targets in a single library
|
||||||
|
|
||||||
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
|
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
|
||||||
|
@ -201,7 +212,8 @@ Please note that it is not possible to combine support for different architectur
|
||||||
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
||||||
- **AIX**: Supported on PPC up to POWER8
|
- **AIX**: Supported on PPC up to POWER8
|
||||||
- **Haiku**: Supported by the community. We don't actively test the library on this OS.
|
- **Haiku**: Supported by the community. We don't actively test the library on this OS.
|
||||||
- **SunOS**: Supported by the community. We don't actively test the library on this OS:
|
- **SunOS**: Supported by the community. We don't actively test the library on this OS.
|
||||||
|
- **Cortex-M**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-on-Cortex-M>.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
|
|
@ -104,3 +104,8 @@ VORTEX
|
||||||
ZARCH_GENERIC
|
ZARCH_GENERIC
|
||||||
Z13
|
Z13
|
||||||
Z14
|
Z14
|
||||||
|
|
||||||
|
10.RISC-V 64:
|
||||||
|
RISCV64_GENERIC
|
||||||
|
C910V
|
||||||
|
|
||||||
|
|
20
appveyor.yml
20
appveyor.yml
|
@ -30,10 +30,10 @@ environment:
|
||||||
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
|
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
|
||||||
matrix:
|
matrix:
|
||||||
- COMPILER: clang-cl
|
- COMPILER: clang-cl
|
||||||
WITH_FORTRAN: yes
|
WITH_FORTRAN: ON
|
||||||
- COMPILER: clang-cl
|
- COMPILER: clang-cl
|
||||||
DYNAMIC_ARCH: ON
|
DYNAMIC_ARCH: ON
|
||||||
WITH_FORTRAN: no
|
WITH_FORTRAN: OFF
|
||||||
- COMPILER: cl
|
- COMPILER: cl
|
||||||
- COMPILER: MinGW64-gcc-7.2.0-mingw
|
- COMPILER: MinGW64-gcc-7.2.0-mingw
|
||||||
DYNAMIC_ARCH: OFF
|
DYNAMIC_ARCH: OFF
|
||||||
|
@ -47,12 +47,7 @@ environment:
|
||||||
install:
|
install:
|
||||||
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
|
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
|
||||||
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
|
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
|
||||||
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake
|
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake ninja flang=11.0.1
|
||||||
|
|
||||||
- if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja
|
|
||||||
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja
|
|
||||||
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang
|
|
||||||
|
|
||||||
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
|
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||||
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
|
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
|
||||||
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
|
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
|
||||||
|
@ -68,15 +63,14 @@ before_build:
|
||||||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
|
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
|
||||||
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
||||||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
||||||
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
|
- if [%WITH_FORTRAN%]==[OFF] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
|
||||||
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
|
- if [%WITH_FORTRAN%]==[ON] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
|
||||||
|
- if [%USE_OPENMP%]==[ON] cmake -DUSE_OPENMP=ON ..
|
||||||
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
|
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
|
||||||
|
|
||||||
build_script:
|
build_script:
|
||||||
- cmake --build .
|
- cmake --build .
|
||||||
|
|
||||||
test_script:
|
test_script:
|
||||||
- echo Running Test
|
- ctest -j2
|
||||||
- cd utest
|
|
||||||
- openblas_utest
|
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,14 @@ trigger:
|
||||||
branches:
|
branches:
|
||||||
include:
|
include:
|
||||||
- develop
|
- develop
|
||||||
|
resources:
|
||||||
|
containers:
|
||||||
|
- container: oneapi-hpckit
|
||||||
|
image: intel/oneapi-hpckit:latest
|
||||||
|
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
|
||||||
|
- container: oneapi-basekit
|
||||||
|
image: intel/oneapi-basekit:latest
|
||||||
|
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
# manylinux1 is useful to test because the
|
# manylinux1 is useful to test because the
|
||||||
|
@ -68,4 +76,64 @@ jobs:
|
||||||
dir
|
dir
|
||||||
openblas_utest.exe
|
openblas_utest.exe
|
||||||
|
|
||||||
|
- job: OSX_OpenMP
|
||||||
|
pool:
|
||||||
|
vmImage: 'macOS-10.15'
|
||||||
|
steps:
|
||||||
|
- script: |
|
||||||
|
brew update
|
||||||
|
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-10 FC=gfortran-10
|
||||||
|
|
||||||
|
- job: OSX_GCC_Nothreads
|
||||||
|
pool:
|
||||||
|
vmImage: 'macOS-10.15'
|
||||||
|
steps:
|
||||||
|
- script: |
|
||||||
|
brew update
|
||||||
|
make USE_THREADS=0 CC=gcc-10 FC=gfortran-10
|
||||||
|
|
||||||
|
- job: OSX_OpenMP_Clang
|
||||||
|
pool:
|
||||||
|
vmImage: 'macOS-10.15'
|
||||||
|
variables:
|
||||||
|
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||||
|
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||||
|
steps:
|
||||||
|
- script: |
|
||||||
|
brew update
|
||||||
|
brew install llvm libomp
|
||||||
|
make TARGET=CORE2 USE_OPENMP=1 INTERFACE64=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang FC=gfortran-10
|
||||||
|
|
||||||
|
- job: OSX_Ifort_Clang
|
||||||
|
pool:
|
||||||
|
vmImage: 'macOS-10.15'
|
||||||
|
variables:
|
||||||
|
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||||
|
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg
|
||||||
|
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||||
|
MACOS_FORTRAN_COMPONENTS: intel.oneapi.mac.ifort-compiler
|
||||||
|
steps:
|
||||||
|
- script: |
|
||||||
|
brew update
|
||||||
|
brew install llvm libomp
|
||||||
|
sudo mkdir -p /opt/intel
|
||||||
|
sudo chown $USER /opt/intel
|
||||||
|
displayName: prepare for cache restore
|
||||||
|
- task: Cache@2
|
||||||
|
inputs:
|
||||||
|
path: /opt/intel/oneapi
|
||||||
|
key: '"install" | "$(MACOS_HPCKIT_URL)" | "$(MACOS_FORTRAN_COMPONENTS)"'
|
||||||
|
cacheHitVar: CACHE_RESTORED
|
||||||
|
- script: |
|
||||||
|
curl --output webimage.dmg --url $(MACOS_HPCKIT_URL) --retry 5 --retry-delay 5
|
||||||
|
hdiutil attach webimage.dmg
|
||||||
|
sudo /Volumes/"$(basename "$(MACOS_HPCKIT_URL)" .dmg)"/bootstrapper.app/Contents/MacOS/bootstrapper -s --action install --components="$(MACOS_FORTRAN_COMPONENTS)" --eula=accept --continue-with-optional-error=yes --log-dir=.
|
||||||
|
installer_exit_code=$?
|
||||||
|
hdiutil detach /Volumes/"$(basename "$URL" .dmg)" -quiet
|
||||||
|
exit $installer_exit_code
|
||||||
|
displayName: install
|
||||||
|
condition: ne(variables.CACHE_RESTORED, 'true')
|
||||||
|
- script: |
|
||||||
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
make CC=/usr/local/opt/llvm/bin/clang FC=ifort
|
||||||
|
|
||||||
|
|
170
benchmark/amax.c
170
benchmark/amax.c
|
@ -25,125 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef AMAX
|
#undef AMAX
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define AMAX BLASFUNC(dzamax)
|
#define AMAX BLASFUNC(dzamax)
|
||||||
#else
|
#else
|
||||||
#define AMAX BLASFUNC(scamax)
|
#define AMAX BLASFUNC(scamax)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define AMAX BLASFUNC(damax)
|
#define AMAX BLASFUNC(damax)
|
||||||
#else
|
#else
|
||||||
#define AMAX BLASFUNC(samax)
|
#define AMAX BLASFUNC(samax)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
blasint m, i;
|
blasint m, i;
|
||||||
blasint inc_x=1;
|
blasint inc_x = 1;
|
||||||
int loops = 1;
|
int loops = 1;
|
||||||
int l;
|
int l;
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
int from = 1;
|
double time1, timeg;
|
||||||
int to = 200;
|
|
||||||
int step = 1;
|
|
||||||
|
|
||||||
struct timeval start, stop;
|
argc--;
|
||||||
double time1,timeg;
|
argv++;
|
||||||
|
|
||||||
argc--;argv++;
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
from = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
to = MAX(atol(*argv), from);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
step = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
|
||||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
loops = atoi(p);
|
||||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
if ((p = getenv("OPENBLAS_INCX")))
|
||||||
|
inc_x = atoi(p);
|
||||||
|
|
||||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
|
||||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
|
||||||
|
|
||||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||||
|
{
|
||||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
fprintf(stderr, "Out of Memory!!\n");
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __linux
|
#ifdef __linux
|
||||||
|
@ -152,37 +100,31 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
fprintf(stderr, " SIZE Flops\n");
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
for(m = from; m <= to; m += step)
|
for (m = from; m <= to; m += step)
|
||||||
{
|
{
|
||||||
|
|
||||||
timeg=0;
|
timeg = 0;
|
||||||
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
for (l = 0; l < loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||||
|
{
|
||||||
|
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
begin();
|
||||||
{
|
AMAX(&m, x, &inc_x);
|
||||||
|
end();
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
timeg += getsec();
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
|
||||||
}
|
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
|
||||||
AMAX (&m, x, &inc_x);
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops %10.6f sec\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
167
benchmark/amin.c
167
benchmark/amin.c
|
@ -25,124 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef AMIN
|
#undef AMIN
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define AMIN BLASFUNC(dzamin)
|
#define AMIN BLASFUNC(dzamin)
|
||||||
#else
|
#else
|
||||||
#define AMIN BLASFUNC(scamin)
|
#define AMIN BLASFUNC(scamin)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define AMIN BLASFUNC(damin)
|
#define AMIN BLASFUNC(damin)
|
||||||
#else
|
#else
|
||||||
#define AMIN BLASFUNC(samin)
|
#define AMIN BLASFUNC(samin)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
blasint m, i;
|
blasint m, i;
|
||||||
blasint inc_x=1;
|
blasint inc_x = 1;
|
||||||
int loops = 1;
|
int loops = 1;
|
||||||
int l;
|
int l;
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
double time1, timeg;
|
||||||
double time1,timeg;
|
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;
|
||||||
|
argv++;
|
||||||
|
|
||||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
if (argc > 0)
|
||||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
{
|
||||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
from = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
to = MAX(atol(*argv), from);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
step = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
|
||||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX")))
|
||||||
|
inc_x = atoi(p);
|
||||||
|
|
||||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
|
||||||
|
|
||||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
{
|
||||||
|
fprintf(stderr, "Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __linux
|
#ifdef __linux
|
||||||
|
@ -151,39 +100,35 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
fprintf(stderr, " SIZE Flops\n");
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
for(m = from; m <= to; m += step)
|
for (m = from; m <= to; m += step)
|
||||||
{
|
{
|
||||||
|
|
||||||
timeg=0;
|
timeg = 0;
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
|
for (l = 0; l < loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||||
{
|
{
|
||||||
|
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
begin();
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
|
||||||
}
|
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
AMIN(&m, x, &inc_x);
|
||||||
|
|
||||||
AMIN (&m, x, &inc_x);
|
end();
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
|
timeg += getsec();
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops %10.6f sec\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
184
benchmark/asum.c
184
benchmark/asum.c
|
@ -25,132 +25,74 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef ASUM
|
#undef ASUM
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define ASUM BLASFUNC(dzasum)
|
#define ASUM BLASFUNC(dzasum)
|
||||||
#else
|
#else
|
||||||
#define ASUM BLASFUNC(scasum)
|
#define ASUM BLASFUNC(scasum)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define ASUM BLASFUNC(dasum)
|
#define ASUM BLASFUNC(dasum)
|
||||||
#else
|
#else
|
||||||
#define ASUM BLASFUNC(sasum)
|
#define ASUM BLASFUNC(sasum)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
FLOAT result;
|
FLOAT result;
|
||||||
blasint m, i;
|
blasint m, i;
|
||||||
blasint inc_x=1;
|
blasint inc_x = 1;
|
||||||
int loops = 1;
|
int loops = 1;
|
||||||
int l;
|
int l;
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
|
||||||
#else
|
|
||||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
#endif
|
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;
|
||||||
|
argv++;
|
||||||
|
|
||||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
if (argc > 0)
|
||||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
{
|
||||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
from = atol(*argv);
|
||||||
|
argc--;
|
||||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
argv++;
|
||||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
}
|
||||||
|
if (argc > 0)
|
||||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
{
|
||||||
|
to = MAX(atol(*argv), from);
|
||||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
argc--;
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
step = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||||
|
loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX")))
|
||||||
|
inc_x = atoi(p);
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
|
||||||
|
|
||||||
|
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __linux
|
#ifdef __linux
|
||||||
srandom(getpid());
|
srandom(getpid());
|
||||||
|
@ -158,45 +100,33 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
fprintf(stderr, " SIZE Flops\n");
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
for(m = from; m <= to; m += step)
|
for (m = from; m <= to; m += step)
|
||||||
{
|
{
|
||||||
|
|
||||||
timeg=0;
|
timeg = 0;
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (l = 0; l < loops; l++)
|
||||||
{
|
{
|
||||||
|
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
|
||||||
}
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
|
||||||
#else
|
|
||||||
clock_gettime(CLOCK_REALTIME, &start);
|
|
||||||
#endif
|
|
||||||
result = ASUM (&m, x, &inc_x);
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
|
||||||
clock_gettime(CLOCK_REALTIME, &stop);
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
#else
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||||
|
{
|
||||||
|
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
begin();
|
||||||
|
result = ASUM(&m, x, &inc_x);
|
||||||
|
end();
|
||||||
|
timeg += getsec();
|
||||||
}
|
}
|
||||||
if (loops >1)
|
if (loops > 1)
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef AXPBY
|
#undef AXPBY
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -129,7 +58,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -176,16 +104,10 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (l=0; l<loops; l++)
|
||||||
{
|
{
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
|
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
|
||||||
|
end();
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
timeg += getsec();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef AXPY
|
#undef AXPY
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -127,8 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -175,13 +102,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
clock_gettime( CLOCK_REALTIME, &start);
|
begin();
|
||||||
|
|
||||||
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
||||||
|
|
||||||
clock_gettime( CLOCK_REALTIME, &stop);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#include <mach/mach_time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz){
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size){
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
|
struct timeval start, stop;
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
mach_timebase_info_data_t info;
|
||||||
|
uint64_t start = 0, stop = 0;
|
||||||
|
#else
|
||||||
|
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double getsec()
|
||||||
|
{
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
|
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
mach_timebase_info(&info);
|
||||||
|
return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9;
|
||||||
|
#else
|
||||||
|
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void begin() {
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||||
|
#else
|
||||||
|
clock_gettime(CLOCK_REALTIME, &start);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void end() {
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||||
|
#else
|
||||||
|
clock_gettime(CLOCK_REALTIME, &stop);
|
||||||
|
#endif
|
||||||
|
}
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
double fabs(double);
|
double fabs(double);
|
||||||
|
|
||||||
|
@ -71,41 +66,6 @@ double fabs(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
static __inline double getmflops(int ratio, int m, double secs){
|
static __inline double getmflops(int ratio, int m, double secs){
|
||||||
|
|
||||||
double mm = (double)m;
|
double mm = (double)m;
|
||||||
|
@ -145,7 +105,6 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT maxerr;
|
FLOAT maxerr;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -220,20 +179,19 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info != 0) {
|
if (info != 0) {
|
||||||
fprintf(stderr, "Info = %d\n", info);
|
fprintf(stderr, "Info = %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
maxerr = 0.;
|
|
||||||
|
|
||||||
if (!(uplos & 1)) {
|
if (!(uplos & 1)) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef COPY
|
#undef COPY
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -128,11 +57,9 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1 = 0.0, timeg = 0.0;
|
double time1 = 0.0, timeg = 0.0;
|
||||||
long nanos = 0;
|
long nanos = 0;
|
||||||
time_t seconds = 0;
|
time_t seconds = 0;
|
||||||
struct timespec time_start = { 0, 0 }, time_end = { 0, 0 };
|
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
|
||||||
|
@ -176,15 +103,10 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (l=0; l<loops; l++)
|
||||||
{
|
{
|
||||||
clock_gettime(CLOCK_REALTIME, &time_start);
|
begin();
|
||||||
COPY (&m, x, &inc_x, y, &inc_y );
|
COPY (&m, x, &inc_x, y, &inc_y );
|
||||||
clock_gettime(CLOCK_REALTIME, &time_end);
|
end();
|
||||||
|
timeg += getsec();
|
||||||
nanos = time_end.tv_nsec - time_start.tv_nsec;
|
|
||||||
seconds = time_end.tv_sec - time_start.tv_sec;
|
|
||||||
|
|
||||||
time1 = seconds + nanos / 1.e9;
|
|
||||||
timeg += time1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef DOT
|
#undef DOT
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define DOT BLASFUNC(ddot)
|
#define DOT BLASFUNC(ddot)
|
||||||
#else
|
#else
|
||||||
#define DOT BLASFUNC(sdot)
|
#define DOT BLASFUNC(sdot)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -122,7 +49,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -169,15 +95,12 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
timeg += getsec();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,13 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEEV
|
#undef GEEV
|
||||||
|
|
||||||
|
@ -74,71 +68,6 @@ extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
|
||||||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
|
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||||
|
@ -154,7 +83,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -223,7 +151,7 @@ int main(int argc, char *argv[]){
|
||||||
for(m = from; m <= to; m += step){
|
for(m = from; m <= to; m += step){
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
lwork = -1;
|
lwork = -1;
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
|
@ -239,14 +167,14 @@ int main(int argc, char *argv[]){
|
||||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
|
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEMM
|
#undef GEMM
|
||||||
|
|
||||||
|
@ -55,71 +49,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
IFLOAT *a, *b;
|
IFLOAT *a, *b;
|
||||||
|
@ -139,7 +68,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -228,14 +156,14 @@ int main(int argc, char *argv[]){
|
||||||
ldc = m;
|
ldc = m;
|
||||||
|
|
||||||
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
|
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
for (j=0; j<loops; j++) {
|
for (j=0; j<loops; j++) {
|
||||||
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
|
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg = time1/loops;
|
timeg = time1/loops;
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEMM
|
#undef GEMM
|
||||||
|
|
||||||
|
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -133,7 +62,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -187,16 +115,12 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
timeg += getsec();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEMV
|
#undef GEMV
|
||||||
|
@ -52,72 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -211,10 +139,10 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -248,10 +176,10 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GER
|
#undef GER
|
||||||
|
|
||||||
|
@ -49,72 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -131,7 +59,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -198,16 +125,13 @@ int main(int argc, char *argv[]){
|
||||||
for (l=0; l<loops; l++)
|
for (l=0; l<loops; l++)
|
||||||
{
|
{
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
|
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
|
timeg += getsec();
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
double fabs(double);
|
double fabs(double);
|
||||||
|
|
||||||
|
@ -66,71 +61,6 @@ double fabs(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b;
|
FLOAT *a, *b;
|
||||||
|
@ -142,7 +72,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -194,22 +123,18 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
|
|
||||||
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"%10.2f MFlops %10.6f s\n",
|
"%10.2f MFlops %10.6f s\n",
|
||||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef GETRF
|
#undef GETRF
|
||||||
#undef GETRI
|
#undef GETRI
|
||||||
|
@ -72,71 +67,6 @@
|
||||||
|
|
||||||
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
|
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a,*work;
|
FLOAT *a,*work;
|
||||||
|
@ -148,7 +78,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -205,21 +134,21 @@ int main(int argc, char *argv[]){
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
lwork = -1;
|
lwork = -1;
|
||||||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
||||||
|
|
||||||
lwork = (blasint)wkopt[0];
|
lwork = (blasint)wkopt[0];
|
||||||
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HBMV
|
#undef HBMV
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HBMV BLASFUNC(zhbmv)
|
#define HBMV BLASFUNC(zhbmv)
|
||||||
#else
|
#else
|
||||||
#define HBMV BLASFUNC(chbmv)
|
#define HBMV BLASFUNC(chbmv)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz) {
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size) {
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -125,7 +52,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -186,15 +112,13 @@ int main(int argc, char *argv[]){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
timeg += getsec();
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HEMM
|
#undef HEMM
|
||||||
|
|
||||||
|
@ -41,72 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define HEMM BLASFUNC(chemm)
|
#define HEMM BLASFUNC(chemm)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -126,7 +54,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -170,13 +97,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HEMV
|
#undef HEMV
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HEMV BLASFUNC(zhemv)
|
#define HEMV BLASFUNC(zhemv)
|
||||||
#else
|
#else
|
||||||
#define HEMV BLASFUNC(chemv)
|
#define HEMV BLASFUNC(chemv)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -124,7 +51,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -182,13 +108,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HER
|
#undef HER
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HER BLASFUNC(zher)
|
#define HER BLASFUNC(zher)
|
||||||
#else
|
#else
|
||||||
#define HER BLASFUNC(cher)
|
#define HER BLASFUNC(cher)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x;
|
FLOAT *a, *x;
|
||||||
|
@ -126,8 +53,6 @@ int main(int argc, char *argv[]){
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -166,15 +91,13 @@ int main(int argc, char *argv[]){
|
||||||
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HER (&uplo, &m, alpha, x, &incx, a, &m );
|
HER (&uplo, &m, alpha, x, &incx, a, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HER2
|
#undef HER2
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HER2 BLASFUNC(zher2)
|
#define HER2 BLASFUNC(zher2)
|
||||||
#else
|
#else
|
||||||
#define HER2 BLASFUNC(cher2)
|
#define HER2 BLASFUNC(cher2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -127,7 +54,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -169,16 +95,13 @@ int main(int argc, char *argv[]){
|
||||||
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
|
|
||||||
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
|
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HER2K
|
#undef HER2K
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
|
@ -40,72 +34,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define HER2K BLASFUNC(cher2k)
|
#define HER2K BLASFUNC(cher2k)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -125,7 +53,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -169,13 +96,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HERK
|
#undef HERK
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HERK BLASFUNC(zherk)
|
#define HERK BLASFUNC(zherk)
|
||||||
#else
|
#else
|
||||||
#define HERK BLASFUNC(cherk)
|
#define HERK BLASFUNC(cherk)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *c;
|
FLOAT *a, *c;
|
||||||
|
@ -127,7 +54,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -167,18 +93,17 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HPMV
|
#undef HPMV
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HPMV BLASFUNC(zhpmv)
|
#define HPMV BLASFUNC(zhpmv)
|
||||||
#else
|
#else
|
||||||
#define HPMV BLASFUNC(chpmv)
|
#define HPMV BLASFUNC(chpmv)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz) {
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size) {
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -124,7 +51,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -183,13 +109,13 @@ int main(int argc, char *argv[]){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef IAMAX
|
#undef IAMAX
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
IAMAX (&m, x, &inc_x);
|
IAMAX (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef IAMIN
|
#undef IAMIN
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
IAMIN (&m, x, &inc_x);
|
IAMIN (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef IMAX
|
#undef IMAX
|
||||||
|
|
||||||
|
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
IMAX (&m, x, &inc_x);
|
IMAX (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef IMIN
|
#undef IMIN
|
||||||
|
|
||||||
|
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
IMIN (&m, x, &inc_x);
|
IMIN (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
double fabs(double);
|
double fabs(double);
|
||||||
|
|
||||||
|
@ -72,71 +67,6 @@ double fabs(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b;
|
FLOAT *a, *b;
|
||||||
|
@ -151,7 +81,6 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT maxerr;
|
FLOAT maxerr;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1, time2;
|
double time1, time2;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -198,31 +127,31 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GETRF (&m, &m, a, &m, ipiv, &info);
|
GETRF (&m, &m, a, &m, ipiv, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);
|
GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time2 = getsec();
|
||||||
|
|
||||||
maxerr = 0.;
|
maxerr = 0.;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef NAMAX
|
#undef NAMAX
|
||||||
|
|
||||||
|
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
NAMAX (&m, x, &inc_x);
|
NAMAX (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef NAMIN
|
#undef NAMIN
|
||||||
|
|
||||||
|
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
NAMIN (&m, x, &inc_x);
|
NAMIN (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef NRM2
|
#undef NRM2
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
NRM2 (&m, x, &inc_x);
|
NRM2 (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
double fabs(double);
|
double fabs(double);
|
||||||
|
|
||||||
|
@ -86,37 +81,7 @@ double fabs(double);
|
||||||
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
|
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
|
||||||
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
|
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
@ -141,7 +106,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -217,18 +181,18 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info != 0) {
|
if (info != 0) {
|
||||||
fprintf(stderr, "Potrf info = %d\n", info);
|
fprintf(stderr, "Potrf info = %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
||||||
|
|
||||||
if ( btest == 'S' )
|
if ( btest == 'S' )
|
||||||
|
@ -240,17 +204,17 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
|
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info != 0) {
|
if (info != 0) {
|
||||||
fprintf(stderr, "Potrs info = %d\n", info);
|
fprintf(stderr, "Potrs info = %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -258,18 +222,18 @@ int main(int argc, char *argv[]){
|
||||||
if ( btest == 'I' )
|
if ( btest == 'I' )
|
||||||
{
|
{
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
POTRI(uplo[uplos], &m, b, &m, &info);
|
POTRI(uplo[uplos], &m, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info != 0) {
|
if (info != 0) {
|
||||||
fprintf(stderr, "Potri info = %d\n", info);
|
fprintf(stderr, "Potri info = %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
|
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef ROT
|
#undef ROT
|
||||||
|
|
||||||
|
@ -52,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -133,7 +63,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -179,13 +108,13 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (l=0; l<loops; l++)
|
||||||
{
|
{
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||||
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef ROTM
|
#undef ROTM
|
||||||
|
|
||||||
|
@ -40,72 +35,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ROTM BLASFUNC(srotm)
|
#define ROTM BLASFUNC(srotm)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz)
|
|
||||||
{
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv) {
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size)
|
|
||||||
{
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =
|
|
||||||
shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT | 0600)) < 0) {
|
|
||||||
printf("Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1) {
|
|
||||||
printf("Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -122,7 +51,7 @@ int main(int argc, char *argv[])
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;
|
argc--;
|
||||||
|
@ -188,14 +117,13 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
for (l = 0; l < loops; l++) {
|
for (l = 0; l < loops; l++) {
|
||||||
gettimeofday(&start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
ROTM(&m, x, &inc_x, y, &inc_y, param);
|
ROTM(&m, x, &inc_x, y, &inc_y, param);
|
||||||
|
|
||||||
gettimeofday(&stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) +
|
time1 = getsec();
|
||||||
(double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SCAL
|
#undef SCAL
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -128,7 +57,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -174,13 +102,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SCAL (&m, alpha, x, &inc_x);
|
SCAL (&m, alpha, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,17 +25,10 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SPMV
|
#undef SPMV
|
||||||
|
|
||||||
|
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
|
@ -54,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -135,7 +63,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -193,13 +120,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SPR
|
#undef SPR
|
||||||
|
|
||||||
|
@ -41,73 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SPR BLASFUNC(sspr)
|
#define SPR BLASFUNC(sspr)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a,*c;
|
FLOAT *a,*c;
|
||||||
|
@ -129,7 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -173,13 +99,13 @@ int main(int argc, char *argv[]){
|
||||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SPR (&uplo, &m, alpha, c, &inc_x, a);
|
SPR (&uplo, &m, alpha, c, &inc_x, a);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SPR2
|
#undef SPR2
|
||||||
|
@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a,*b,*c;
|
FLOAT *a,*b,*c;
|
||||||
|
@ -129,7 +58,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -182,13 +110,13 @@ int main(int argc, char *argv[]){
|
||||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
|
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SWAP
|
#undef SWAP
|
||||||
|
@ -49,71 +44,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -128,7 +58,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -175,13 +104,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SWAP (&m, x, &inc_x, y, &inc_y );
|
SWAP (&m, x, &inc_x, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYMM
|
#undef SYMM
|
||||||
|
|
||||||
|
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -181,13 +109,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYMV
|
#undef SYMV
|
||||||
|
|
||||||
|
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -134,7 +63,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -192,13 +120,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYR
|
#undef SYR
|
||||||
|
@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x,*a;
|
FLOAT *x,*a;
|
||||||
|
@ -124,7 +53,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -165,13 +93,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYR (&uplo, &m, alpha, x, &inc_x, a, &m );
|
SYR (&uplo, &m, alpha, x, &inc_x, a, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYR2
|
#undef SYR2
|
||||||
|
|
||||||
|
@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SYR2 BLASFUNC(ssyr2)
|
#define SYR2 BLASFUNC(ssyr2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y, *a;
|
FLOAT *x, *y, *a;
|
||||||
|
@ -125,7 +53,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -174,13 +101,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
|
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYR2K
|
#undef SYR2K
|
||||||
|
@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -137,7 +67,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -181,13 +110,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYRK
|
#undef SYRK
|
||||||
|
|
||||||
|
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *c;
|
FLOAT *a, *c;
|
||||||
|
@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -177,13 +105,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef TPMV
|
#undef TPMV
|
||||||
|
|
||||||
|
@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size)
|
|
||||||
{
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1) {
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -112,7 +73,6 @@ int main(int argc, char *argv[])
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -153,11 +113,11 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
for (l = 0; l < loops; l++) {
|
for (l = 0; l < loops; l++) {
|
||||||
clock_gettime(CLOCK_REALTIME, &start);
|
begin();
|
||||||
TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
||||||
clock_gettime(CLOCK_REALTIME, &stop);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef TPSV
|
#undef TPSV
|
||||||
|
|
||||||
|
@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size)
|
|
||||||
{
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1) {
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -112,7 +73,6 @@ int main(int argc, char *argv[])
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -153,11 +113,11 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
for (l = 0; l < loops; l++) {
|
for (l = 0; l < loops; l++) {
|
||||||
clock_gettime(CLOCK_REALTIME, &start);
|
begin();
|
||||||
TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
||||||
clock_gettime(CLOCK_REALTIME, &stop);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef TRMM
|
#undef TRMM
|
||||||
|
@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b;
|
FLOAT *a, *b;
|
||||||
|
@ -141,7 +71,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -180,13 +109,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops %10.6f sec\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef TRMV
|
#undef TRMV
|
||||||
|
|
||||||
|
@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size)
|
|
||||||
{
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1) {
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -112,7 +73,6 @@ int main(int argc, char *argv[])
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -153,11 +113,11 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
for (l = 0; l < loops; l++) {
|
for (l = 0; l < loops; l++) {
|
||||||
clock_gettime(CLOCK_REALTIME, &start);
|
begin();
|
||||||
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
|
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
|
||||||
clock_gettime(CLOCK_REALTIME, &stop);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef TRSM
|
#undef TRSM
|
||||||
|
@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b;
|
FLOAT *a, *b;
|
||||||
|
@ -151,7 +81,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -196,13 +125,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,14 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include <time.h>
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEMV
|
#undef GEMV
|
||||||
#undef TRSV
|
#undef TRSV
|
||||||
|
@ -55,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x;
|
FLOAT *a, *x;
|
||||||
|
@ -133,7 +61,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec time_start, time_end;
|
|
||||||
time_t seconds = 0;
|
time_t seconds = 0;
|
||||||
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
@ -189,19 +116,13 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(l =0;l< loops;l++){
|
for(l =0;l< loops;l++){
|
||||||
|
|
||||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start);
|
begin();
|
||||||
|
|
||||||
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
|
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
|
||||||
|
end();
|
||||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end);
|
time1 = getsec();
|
||||||
nanos = time_end.tv_nsec - time_start.tv_nsec;
|
|
||||||
seconds = time_end.tv_sec - time_start.tv_sec;
|
|
||||||
|
|
||||||
time1 = seconds + nanos /1.e9;
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
long long muls = n*(n+1)/2.0;
|
long long muls = n*(n+1)/2.0;
|
||||||
long long adds = (n - 1.0)*n/2.0;
|
long long adds = (n - 1.0)*n/2.0;
|
||||||
|
|
|
@ -25,90 +25,18 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#define RETURN_BY_STACK 1
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
#define RETURN_BY_STACK 1
|
||||||
|
|
||||||
#undef DOT
|
#undef DOT
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define DOT BLASFUNC(zdotu)
|
#define DOT BLASFUNC(zdotu)
|
||||||
#else
|
#else
|
||||||
#define DOT BLASFUNC(cdotu)
|
#define DOT BLASFUNC(cdotu)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -123,7 +51,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -170,13 +97,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
DOT (&result, &m, x, &inc_x, y, &inc_y );
|
DOT (&result, &m, x, &inc_x, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef DOT
|
#undef DOT
|
||||||
|
|
||||||
|
@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define DOT BLASFUNC(cdotu)
|
#define DOT BLASFUNC(cdotu)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -122,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -169,15 +96,15 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
#ifdef RETURN_BY_STACK
|
#ifdef RETURN_BY_STACK
|
||||||
DOT (&result , &m, x, &inc_x, y, &inc_y );
|
DOT (&result , &m, x, &inc_x, y, &inc_y );
|
||||||
#else
|
#else
|
||||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||||
#endif
|
#endif
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
26
c_check
26
c_check
|
@ -1,12 +1,13 @@
|
||||||
#!/usr/bin/perl
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
#use File::Basename;
|
#use File::Basename;
|
||||||
# use File::Temp qw(tempfile);
|
# use File::Temp qw(tempfile);
|
||||||
|
|
||||||
# Checking cross compile
|
# Checking cross compile
|
||||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
$hostarch = `uname -m | sed -e s/i.86/x86/`;
|
||||||
$hostarch = `uname -p` if ($hostos eq "AIX");
|
$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS");
|
||||||
|
chop($hostarch);
|
||||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||||
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
|
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
|
||||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||||
|
@ -92,6 +93,7 @@ $architecture = ia64 if ($data =~ /ARCH_IA64/);
|
||||||
$architecture = arm if ($data =~ /ARCH_ARM/);
|
$architecture = arm if ($data =~ /ARCH_ARM/);
|
||||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
|
||||||
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
|
||||||
|
$architecture = riscv64 if ($data =~ /ARCH_RISCV64/);
|
||||||
|
|
||||||
$defined = 0;
|
$defined = 0;
|
||||||
|
|
||||||
|
@ -136,6 +138,11 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) {
|
||||||
$binary =32;
|
$binary =32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($architecture eq "riscv64") {
|
||||||
|
$defined = 1;
|
||||||
|
$binary = 64;
|
||||||
|
}
|
||||||
|
|
||||||
if ($compiler eq "PGI") {
|
if ($compiler eq "PGI") {
|
||||||
$compiler_name .= " -tp p7" if ($binary eq "32");
|
$compiler_name .= " -tp p7" if ($binary eq "32");
|
||||||
$compiler_name .= " -tp p7-64" if ($binary eq "64");
|
$compiler_name .= " -tp p7-64" if ($binary eq "64");
|
||||||
|
@ -192,7 +199,7 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) {
|
||||||
} else {
|
} else {
|
||||||
$tmpf = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
|
$tmpf = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
|
||||||
$code = '"addvi.b $w0, $w1, 1"';
|
$code = '"addvi.b $w0, $w1, 1"';
|
||||||
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
|
$msa_flags = "-mmsa -mfp64 -mload-store-pairs";
|
||||||
print $tmpf "#include <msa.h>\n\n";
|
print $tmpf "#include <msa.h>\n\n";
|
||||||
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
||||||
|
|
||||||
|
@ -270,6 +277,15 @@ if ($data =~ /HAVE_C11/) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($compiler eq "GCC" &&( ($architecture eq "x86") || ($architecture eq "x86_64"))) {
|
||||||
|
$no_avx2 = 0;
|
||||||
|
$oldgcc = 0;
|
||||||
|
$data = `$compiler_name -dumpversion`;
|
||||||
|
if ($data <= 4.6) {
|
||||||
|
$no_avx2 = 1;
|
||||||
|
$oldgcc = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
$data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;
|
||||||
|
|
||||||
|
@ -362,6 +378,8 @@ print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
|
||||||
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
|
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
|
||||||
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
|
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
|
||||||
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
|
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
|
||||||
|
print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1;
|
||||||
|
print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1;
|
||||||
|
|
||||||
$os =~ tr/[a-z]/[A-Z]/;
|
$os =~ tr/[a-z]/[A-Z]/;
|
||||||
$architecture =~ tr/[a-z]/[A-Z]/;
|
$architecture =~ tr/[a-z]/[A-Z]/;
|
||||||
|
|
6
cblas.h
6
cblas.h
|
@ -125,9 +125,14 @@ void cblas_zswap(OPENBLAS_CONST blasint n, void *x, OPENBLAS_CONST blasint incx,
|
||||||
|
|
||||||
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
void cblas_srot(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
||||||
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
void cblas_drot(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
||||||
|
void cblas_csrot(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float c, OPENBLAS_CONST float s);
|
||||||
|
void cblas_zdrot(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double c, OPENBLAS_CONST double s);
|
||||||
|
|
||||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||||
|
void cblas_crotg(void *a, void *b, float *c, void *s);
|
||||||
|
void cblas_zrotg(void *a, void *b, double *c, void *s);
|
||||||
|
|
||||||
|
|
||||||
void cblas_srotm(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float *P);
|
void cblas_srotm(OPENBLAS_CONST blasint N, float *X, OPENBLAS_CONST blasint incX, float *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST float *P);
|
||||||
void cblas_drotm(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double *P);
|
void cblas_drotm(OPENBLAS_CONST blasint N, double *X, OPENBLAS_CONST blasint incX, double *Y, OPENBLAS_CONST blasint incY, OPENBLAS_CONST double *P);
|
||||||
|
@ -393,6 +398,7 @@ void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPE
|
||||||
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
|
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
|
||||||
/* dot production of BFLOAT16 input arrays, and output as float */
|
/* dot production of BFLOAT16 input arrays, and output as float */
|
||||||
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
|
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
|
||||||
|
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,10 +45,14 @@ endif ()
|
||||||
if (DYNAMIC_ARCH)
|
if (DYNAMIC_ARCH)
|
||||||
if (ARM64)
|
if (ARM64)
|
||||||
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
|
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
|
||||||
|
if (DYNAMIC_LIST)
|
||||||
|
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})
|
||||||
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (POWER)
|
if (POWER)
|
||||||
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
|
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
|
||||||
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DHAVE_P10_SUPPORT")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (X86)
|
if (X86)
|
||||||
|
|
|
@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CORE} STREQUAL "SKYLAKEX")
|
if (${CORE} STREQUAL SKYLAKEX)
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
if (NOT NO_AVX512)
|
if (NOT NO_AVX512)
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||||
|
@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CORE} STREQUAL "COOPERLAKE")
|
if (${CORE} STREQUAL COOPERLAKE)
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
if (NOT NO_AVX512)
|
if (NOT NO_AVX512)
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
|
@ -124,10 +124,22 @@ if (NOT DYNAMIC_ARCH)
|
||||||
if (HAVE_AVX)
|
if (HAVE_AVX)
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx")
|
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx")
|
||||||
endif ()
|
endif ()
|
||||||
|
if (HAVE_FMA3)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -mfma")
|
||||||
|
endif ()
|
||||||
|
if (HAVE_SSE)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -msse")
|
||||||
|
endif ()
|
||||||
|
if (HAVE_SSE2)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -msse2")
|
||||||
|
endif ()
|
||||||
if (HAVE_SSE3)
|
if (HAVE_SSE3)
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -msse3")
|
set (CCOMMON_OPT "${CCOMMON_OPT} -msse3")
|
||||||
endif ()
|
endif ()
|
||||||
if (HAVE_SSSE3)
|
if (HAVE_SSSE3)
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3")
|
set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3")
|
||||||
endif ()
|
endif ()
|
||||||
|
if (HAVE_SSE4_1)
|
||||||
|
set (CCOMMON_OPT "${CCOMMON_OPT} -msse4.1")
|
||||||
|
endif ()
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -184,8 +184,8 @@ macro(SetDefaultL2)
|
||||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||||
if (BUILD_BFLOAT16)
|
if (BUILD_BFLOAT16)
|
||||||
set(SBGEMVNKERNEL ../arm/gemv_n.c)
|
set(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
|
||||||
set(SBGEMVTKERNEL ../arm/gemv_t.c)
|
set(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
|
||||||
set(SHGERKERNEL ../generic/ger.c)
|
set(SHGERKERNEL ../generic/ger.c)
|
||||||
endif ()
|
endif ()
|
||||||
endmacro ()
|
endmacro ()
|
||||||
|
|
|
@ -2499,6 +2499,5 @@ foreach (Utils_FILE ${Utils_SRC})
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include")
|
set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include")
|
||||||
configure_file("${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h" COPYONLY)
|
|
||||||
include_directories(${lapacke_include_dir})
|
include_directories(${lapacke_include_dir})
|
||||||
set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}")
|
set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}")
|
||||||
|
|
|
@ -84,6 +84,14 @@ if (X86)
|
||||||
set(NO_EXPRECISION 1)
|
set(NO_EXPRECISION 1)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (DYNAMIC_ARCH)
|
||||||
|
if (TARGET)
|
||||||
|
if (${TARGET} STREQUAL "GENERIC")
|
||||||
|
set(NO_EXPRECISION 1)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (UTEST_CHECK)
|
if (UTEST_CHECK)
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
|
||||||
set(SANITY_CHECK 1)
|
set(SANITY_CHECK 1)
|
||||||
|
|
|
@ -416,6 +416,29 @@ endif ()
|
||||||
set(ZGEMM_UNROLL_M 4)
|
set(ZGEMM_UNROLL_M 4)
|
||||||
set(ZGEMM_UNROLL_N 4)
|
set(ZGEMM_UNROLL_N 4)
|
||||||
set(SYMV_P 16)
|
set(SYMV_P 16)
|
||||||
|
elseif ("${TCORE}" STREQUAL "VORTEX")
|
||||||
|
file(APPEND ${TARGET_CONF_TEMP}
|
||||||
|
"#define ARMV8\n"
|
||||||
|
"#define L1_CODE_SIZE\t32768\n"
|
||||||
|
"#define L1_CODE_LINESIZE\t64\n"
|
||||||
|
"#define L1_CODE_ASSOCIATIVE\t4\n"
|
||||||
|
"#define L1_DATA_SIZE\t32768\n"
|
||||||
|
"#define L1_DATA_LINESIZE\t64\n"
|
||||||
|
"#define L1_DATA_ASSOCIATIVE\t4\n"
|
||||||
|
"#define L2_SIZE\t5262144\n"
|
||||||
|
"#define L2_LINESIZE\t64\n"
|
||||||
|
"#define L2_ASSOCIATIVE\t8\n"
|
||||||
|
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||||
|
"#define DTB_SIZE\t4096\n")
|
||||||
|
set(SGEMM_UNROLL_M 16)
|
||||||
|
set(SGEMM_UNROLL_N 4)
|
||||||
|
set(DGEMM_UNROLL_M 8)
|
||||||
|
set(DGEMM_UNROLL_N 4)
|
||||||
|
set(CGEMM_UNROLL_M 8)
|
||||||
|
set(CGEMM_UNROLL_N 4)
|
||||||
|
set(ZGEMM_UNROLL_M 4)
|
||||||
|
set(ZGEMM_UNROLL_N 4)
|
||||||
|
set(SYMV_P 16)
|
||||||
elseif ("${TCORE}" STREQUAL "POWER6")
|
elseif ("${TCORE}" STREQUAL "POWER6")
|
||||||
file(APPEND ${TARGET_CONF_TEMP}
|
file(APPEND ${TARGET_CONF_TEMP}
|
||||||
"#define L1_DATA_SIZE 32768\n"
|
"#define L1_DATA_SIZE 32768\n"
|
||||||
|
@ -533,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING)
|
||||||
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
unset (HAVE_AVX2)
|
||||||
|
unset (HAVE_AVX)
|
||||||
|
unset (HAVE_FMA3)
|
||||||
|
unset (HAVE_MMX)
|
||||||
|
unset (HAVE_SSE)
|
||||||
|
unset (HAVE_SSE2)
|
||||||
|
unset (HAVE_SSE3)
|
||||||
|
unset (HAVE_SSSE3)
|
||||||
|
unset (HAVE_SSE4A)
|
||||||
|
unset (HAVE_SSE4_1)
|
||||||
|
unset (HAVE_SSE4_2)
|
||||||
|
unset (HAVE_NEON)
|
||||||
|
unset (HAVE_VFP)
|
||||||
|
unset (HAVE_VFPV3)
|
||||||
|
unset (HAVE_VFPV4)
|
||||||
message(STATUS "Running getarch")
|
message(STATUS "Running getarch")
|
||||||
|
|
||||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||||
|
|
|
@ -44,38 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (DEFINED TARGET)
|
|
||||||
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
|
|
||||||
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
|
||||||
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
|
||||||
else()
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
|
||||||
endif()
|
|
||||||
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
|
||||||
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
|
||||||
# endif()
|
|
||||||
endif()
|
|
||||||
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
|
||||||
endif()
|
|
||||||
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
|
|
||||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
|
||||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
|
||||||
endif()
|
|
||||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
if (DEFINED HAVE_SSE3)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (DEFINED TARGET)
|
if (DEFINED TARGET)
|
||||||
|
message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --")
|
||||||
message(STATUS "Targeting the ${TARGET} architecture.")
|
message(STATUS "Targeting the ${TARGET} architecture.")
|
||||||
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -175,6 +146,67 @@ else()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||||
|
if (DEFINED TARGET)
|
||||||
|
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
|
||||||
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||||
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
|
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 10.09)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
||||||
|
else()
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||||
|
endif()
|
||||||
|
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang")
|
||||||
|
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 8.99)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
||||||
|
else()
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||||
|
endif()
|
||||||
|
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
|
||||||
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||||
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
|
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||||
|
endif()
|
||||||
|
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_AVX)
|
||||||
|
if (NOT NO_AVX)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_AVX2)
|
||||||
|
if (NOT NO_AVX2)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_FMA3)
|
||||||
|
if (NOT NO_AVX2)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSE)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSE2)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSE3)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSSE3)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSE4_1)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
if (DEFINED BINARY)
|
if (DEFINED BINARY)
|
||||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -205,6 +237,11 @@ if (BINARY64)
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if(EMBEDDED)
|
||||||
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DOS_EMBEDDED")
|
||||||
|
set(CCOMMON_OPT "${CCOMMON_OPT} -mthumb -mcpu=cortex-m4 -mfloat-abi=hard -mfpu=fpv4-sp-d16")
|
||||||
|
endif()
|
||||||
|
|
||||||
if (NEED_PIC)
|
if (NEED_PIC)
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "IBM")
|
if (${CMAKE_C_COMPILER} STREQUAL "IBM")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large")
|
||||||
|
@ -262,6 +299,10 @@ if (NO_AVX2)
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (NO_AVX512)
|
||||||
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (USE_THREAD)
|
if (USE_THREAD)
|
||||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||||
# NO_AFFINITY = 1
|
# NO_AFFINITY = 1
|
||||||
|
|
|
@ -74,6 +74,9 @@ macro(ParseMakefileVars MAKEFILE_IN)
|
||||||
string(REGEX MATCH "ifneq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
|
string(REGEX MATCH "ifneq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
|
||||||
if (NOT "${line_match}" STREQUAL "")
|
if (NOT "${line_match}" STREQUAL "")
|
||||||
# message(STATUS "IFNEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
|
# message(STATUS "IFNEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
|
||||||
|
if ( ${CMAKE_MATCH_1} STREQUAL C_COMPILER)
|
||||||
|
set (CMAKE_MATCH_1 CMAKE_C_COMPILER)
|
||||||
|
endif ()
|
||||||
if (NOT ( ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2}))
|
if (NOT ( ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2}))
|
||||||
# message (STATUS "condition is true")
|
# message (STATUS "condition is true")
|
||||||
set (IfElse 1)
|
set (IfElse 1)
|
||||||
|
|
18
common.h
18
common.h
|
@ -122,7 +122,7 @@ extern "C" {
|
||||||
#define ATOM GOTO_ATOM
|
#define ATOM GOTO_ATOM
|
||||||
#undef GOTO_ATOM
|
#undef GOTO_ATOM
|
||||||
#endif
|
#endif
|
||||||
#else
|
#elif !defined(OS_EMBEDDED)
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#ifndef NO_SYSV_IPC
|
#ifndef NO_SYSV_IPC
|
||||||
#include <sys/shm.h>
|
#include <sys/shm.h>
|
||||||
|
@ -134,6 +134,9 @@ extern "C" {
|
||||||
#if defined(SMP) || defined(USE_LOCKING)
|
#if defined(SMP) || defined(USE_LOCKING)
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
#endif
|
#endif
|
||||||
|
#else
|
||||||
|
#include <time.h>
|
||||||
|
#include <math.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(OS_SUNOS)
|
#if defined(OS_SUNOS)
|
||||||
|
@ -437,6 +440,11 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||||
#include "common_mips.h"
|
#include "common_mips.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef ARCH_RISCV64
|
||||||
|
#include "common_riscv64.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ARCH_MIPS64
|
#ifdef ARCH_MIPS64
|
||||||
#include "common_mips64.h"
|
#include "common_mips64.h"
|
||||||
#endif
|
#endif
|
||||||
|
@ -483,10 +491,12 @@ static inline unsigned long long rpcc(void){
|
||||||
struct timespec ts;
|
struct timespec ts;
|
||||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||||
return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
|
return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec;
|
||||||
#else
|
#elif !defined(OS_EMBEDDED)
|
||||||
struct timeval tv;
|
struct timeval tv;
|
||||||
gettimeofday(&tv,NULL);
|
gettimeofday(&tv,NULL);
|
||||||
return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
|
return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000;
|
||||||
|
#else
|
||||||
|
return 0;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
#define RPCC_DEFINED
|
#define RPCC_DEFINED
|
||||||
|
@ -516,6 +526,10 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
#include "common_linux.h"
|
#include "common_linux.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef OS_EMBEDDED
|
||||||
|
#define DTB_DEFAULT_ENTRIES 64
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
|
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
|
||||||
|
|
||||||
#ifdef __NetBSD__
|
#ifdef __NetBSD__
|
||||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define INLINE inline
|
#define INLINE inline
|
||||||
|
|
||||||
#ifdef F_INTERFACE_FLANG
|
#if defined( F_INTERFACE_FLANG) || defined(F_INTERFACE_PGI)
|
||||||
#define RETURN_BY_STACK
|
#define RETURN_BY_STACK
|
||||||
#else
|
#else
|
||||||
#define RETURN_BY_COMPLEX
|
#define RETURN_BY_COMPLEX
|
||||||
|
@ -142,14 +142,8 @@ REALNAME:
|
||||||
#define HUGE_PAGESIZE ( 4 << 20)
|
#define HUGE_PAGESIZE ( 4 << 20)
|
||||||
|
|
||||||
#ifndef BUFFERSIZE
|
#ifndef BUFFERSIZE
|
||||||
#if defined(CORTEXA57)
|
|
||||||
#define BUFFER_SIZE (20 << 20)
|
|
||||||
#elif defined(TSV110) || defined(EMAG8180)
|
|
||||||
#define BUFFER_SIZE (32 << 20)
|
#define BUFFER_SIZE (32 << 20)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE (16 << 20)
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#define BUFFER_SIZE (32 << BUFFERSIZE)
|
#define BUFFER_SIZE (32 << BUFFERSIZE)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -250,6 +250,8 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||||
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||||
xdouble *, blasint *, xdouble *, blasint *);
|
xdouble *, blasint *, xdouble *, blasint *);
|
||||||
|
|
||||||
|
void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *,
|
||||||
|
bfloat16 *, blasint *, float *, float *, blasint *);
|
||||||
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||||
float *, blasint *, float *, float *, blasint *);
|
float *, blasint *, float *, float *, blasint *);
|
||||||
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||||
|
|
|
@ -44,6 +44,10 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||||
|
int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||||
|
int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
|
||||||
|
int sbgemv_thread_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
|
||||||
int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||||
int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||||
int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||||
|
|
|
@ -75,18 +75,10 @@ static inline int my_mbind(void *addr, unsigned long len, int mode,
|
||||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
|
||||||
return 0;
|
return 0;
|
||||||
#else
|
#else
|
||||||
#if defined (LOONGSON3B)
|
|
||||||
#if defined (__64BIT__)
|
|
||||||
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
|
||||||
#else
|
|
||||||
return 0; //NULL Implementation on Loongson 3B 32bit.
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
//Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34
|
//Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34
|
||||||
// unsigned long null_nodemask=0;
|
// unsigned long null_nodemask=0;
|
||||||
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {
|
||||||
|
|
|
@ -646,10 +646,12 @@
|
||||||
|
|
||||||
#elif defined(BFLOAT16)
|
#elif defined(BFLOAT16)
|
||||||
|
|
||||||
#define D_TO_BF16_K SBDTOBF16_K
|
#define D_TO_BF16_K SBDTOBF16_K
|
||||||
#define D_BF16_TO_K DBF16TOD_K
|
#define D_BF16_TO_K DBF16TOD_K
|
||||||
#define S_TO_BF16_K SBSTOBF16_K
|
#define S_TO_BF16_K SBSTOBF16_K
|
||||||
#define S_BF16_TO_K SBF16TOS_K
|
#define S_BF16_TO_K SBF16TOS_K
|
||||||
|
#define SBGEMV_N SBGEMV_N_K
|
||||||
|
#define SBGEMV_T SBGEMV_T_K
|
||||||
|
|
||||||
#define AMAX_K SAMAX_K
|
#define AMAX_K SAMAX_K
|
||||||
#define AMIN_K SAMIN_K
|
#define AMIN_K SAMIN_K
|
||||||
|
|
|
@ -229,12 +229,7 @@ REALNAME: ;\
|
||||||
|
|
||||||
#define BUFFER_SIZE ( 32 << 21)
|
#define BUFFER_SIZE ( 32 << 21)
|
||||||
|
|
||||||
#if defined(LOONGSON3A)
|
#if defined(LOONGSON3R3) || defined(LOONGSON3R4)
|
||||||
#define PAGESIZE (16UL << 10)
|
|
||||||
#define FIXED_PAGESIZE (16UL << 10)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(LOONGSON3B)
|
|
||||||
#define PAGESIZE (16UL << 10)
|
#define PAGESIZE (16UL << 10)
|
||||||
#define FIXED_PAGESIZE (16UL << 10)
|
#define FIXED_PAGESIZE (16UL << 10)
|
||||||
#endif
|
#endif
|
||||||
|
@ -250,7 +245,7 @@ REALNAME: ;\
|
||||||
#define MAP_ANONYMOUS MAP_ANON
|
#define MAP_ANONYMOUS MAP_ANON
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(LOONGSON3A) || defined(LOONGSON3B)
|
#if defined(LOONGSON3R3) || defined(LOONGSON3R4)
|
||||||
#define PREFETCHD_(x) ld $0, x
|
#define PREFETCHD_(x) ld $0, x
|
||||||
#define PREFETCHD(x) PREFETCHD_(x)
|
#define PREFETCHD(x) PREFETCHD_(x)
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -78,8 +78,8 @@ BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG);
|
||||||
int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||||
int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||||
|
|
||||||
int (*sbgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||||
int (*sbgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||||
int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||||
|
|
||||||
int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||||
|
|
|
@ -844,11 +844,15 @@ Lmcount$lazy_ptr:
|
||||||
#define BUFFER_SIZE ( 2 << 20)
|
#define BUFFER_SIZE ( 2 << 20)
|
||||||
#elif defined(PPC440FP2)
|
#elif defined(PPC440FP2)
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#elif defined(POWER8) || defined(POWER9) || defined(POWER10)
|
#elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||||
#define BUFFER_SIZE ( 64 << 20)
|
#define BUFFER_SIZE ( 64 << 22)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef DYNAMIC_ARCH
|
||||||
|
#undef BUFFER_SIZE
|
||||||
|
#define BUFFER_SIZE (64 << 22)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef PAGESIZE
|
#ifndef PAGESIZE
|
||||||
#define PAGESIZE ( 4 << 10)
|
#define PAGESIZE ( 4 << 10)
|
||||||
|
|
|
@ -0,0 +1,98 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written
|
||||||
|
permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
**********************************************************************************/
|
||||||
|
|
||||||
|
/*********************************************************************/
|
||||||
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* All rights reserved. */
|
||||||
|
/* */
|
||||||
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
/* without modification, are permitted provided that the following */
|
||||||
|
/* conditions are met: */
|
||||||
|
/* */
|
||||||
|
/* 1. Redistributions of source code must retain the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer. */
|
||||||
|
/* */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer in the documentation and/or other materials */
|
||||||
|
/* provided with the distribution. */
|
||||||
|
/* */
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||||
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||||
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||||
|
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||||
|
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||||
|
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||||
|
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||||
|
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||||
|
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||||
|
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/* */
|
||||||
|
/* The views and conclusions contained in the software and */
|
||||||
|
/* documentation are those of the authors and should not be */
|
||||||
|
/* interpreted as representing official policies, either expressed */
|
||||||
|
/* or implied, of The University of Texas at Austin. */
|
||||||
|
/*********************************************************************/
|
||||||
|
|
||||||
|
#ifndef COMMON_RISCV64
|
||||||
|
#define COMMON_RISCV64
|
||||||
|
|
||||||
|
#define MB __sync_synchronize()
|
||||||
|
#define WMB __sync_synchronize()
|
||||||
|
#define RMB __sync_synchronize()
|
||||||
|
|
||||||
|
#define INLINE inline
|
||||||
|
|
||||||
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
|
||||||
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
|
return x / y;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define BUFFER_SIZE ( 32 << 20)
|
||||||
|
#define SEEK_ADDRESS
|
||||||
|
|
||||||
|
#if defined(C910V)
|
||||||
|
#include <riscv-vector.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -8,6 +8,8 @@
|
||||||
#define SBDTOBF16_K sbdtobf16_k
|
#define SBDTOBF16_K sbdtobf16_k
|
||||||
#define SBF16TOS_K sbf16tos_k
|
#define SBF16TOS_K sbf16tos_k
|
||||||
#define DBF16TOD_K dbf16tod_k
|
#define DBF16TOD_K dbf16tod_k
|
||||||
|
#define SBGEMV_N_K sbgemv_n
|
||||||
|
#define SBGEMV_T_K sbgemv_t
|
||||||
|
|
||||||
#define SBGEMM_ONCOPY sbgemm_oncopy
|
#define SBGEMM_ONCOPY sbgemm_oncopy
|
||||||
#define SBGEMM_OTCOPY sbgemm_otcopy
|
#define SBGEMM_OTCOPY sbgemm_otcopy
|
||||||
|
@ -29,6 +31,8 @@
|
||||||
#define SBDTOBF16_K gotoblas -> sbdtobf16_k
|
#define SBDTOBF16_K gotoblas -> sbdtobf16_k
|
||||||
#define SBF16TOS_K gotoblas -> sbf16tos_k
|
#define SBF16TOS_K gotoblas -> sbf16tos_k
|
||||||
#define DBF16TOD_K gotoblas -> dbf16tod_k
|
#define DBF16TOD_K gotoblas -> dbf16tod_k
|
||||||
|
#define SBGEMV_N_K gotoblas -> sbgemv_n
|
||||||
|
#define SBGEMV_T_K gotoblas -> sbgemv_t
|
||||||
|
|
||||||
#define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy
|
#define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy
|
||||||
#define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy
|
#define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy
|
||||||
|
|
|
@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){
|
||||||
#define __BIG_ENDIAN__
|
#define __BIG_ENDIAN__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef C_SUN
|
||||||
|
#ifndef __64BIT
|
||||||
|
#define RETURN_BY_STACK
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
|
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -424,7 +424,7 @@ void get_cpuconfig(void)
|
||||||
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
|
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
|
||||||
printf("#define L1_DATA_SIZE %d \n",value);
|
printf("#define L1_DATA_SIZE %d \n",value);
|
||||||
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
|
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
|
||||||
printf("#define L2_DATA_SIZE %d \n",value);
|
printf("#define L2_SIZE %d \n",value);
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue