Merge branch 'develop' into risc-v
This commit is contained in:
commit
913cc9a4ca
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
||||||
project(OpenBLAS C ASM)
|
project(OpenBLAS C ASM)
|
||||||
set(OpenBLAS_MAJOR_VERSION 0)
|
set(OpenBLAS_MAJOR_VERSION 0)
|
||||||
set(OpenBLAS_MINOR_VERSION 3)
|
set(OpenBLAS_MINOR_VERSION 3)
|
||||||
set(OpenBLAS_PATCH_VERSION 10.dev)
|
set(OpenBLAS_PATCH_VERSION 12.dev)
|
||||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||||
|
|
||||||
# Adhere to GNU filesystem layout conventions
|
# Adhere to GNU filesystem layout conventions
|
||||||
|
|
|
@ -1,4 +1,102 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.12
|
||||||
|
24-Oct-2020
|
||||||
|
|
||||||
|
common:
|
||||||
|
* Fixed missing BLAS/LAPACK functions (inadvertently dropped during
|
||||||
|
the build system restructuring)
|
||||||
|
* Fixed argument conversion macro in LAPACKE_zgesvdq (LAPACK #458)
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Added optimized SCOPY/CCOPY kernels for POWER10
|
||||||
|
* Increased and unified the default size of the GEMM BUFFER
|
||||||
|
* Fixed building for POWER10 in DYNAMIC_ARCH mode
|
||||||
|
* POWER10 compatibility test now checks binutils version as well
|
||||||
|
* Cleaned up compiler warnings
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* corrected compiler version checks for AVX2 compatibility
|
||||||
|
* added compiler option -mavx2 for building with flang
|
||||||
|
* fixed direct SGEMM pathway for small matrix sizes (broken by
|
||||||
|
the code refactoring in 0.3.11)
|
||||||
|
* fixed unhandled partial register clobbers in several kernels
|
||||||
|
for AXPY,DOT,GEMV_N and GEMV_T flagged by gcc10 tree-vectorizer
|
||||||
|
|
||||||
|
ARMV8:
|
||||||
|
* improved Apple Vortex support to include cross-compiling
|
||||||
|
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.11
|
||||||
|
17-Oct-2020
|
||||||
|
|
||||||
|
common:
|
||||||
|
* API change:
|
||||||
|
the newly added BFLOAT16 functions were renamed to use the
|
||||||
|
letter "B" instead of "H" to avoid potential confusion with
|
||||||
|
the IEEE "half precision float" type, i.e. the 0.3.10
|
||||||
|
SHGEMM is now SBGEMM and the corresponding build option
|
||||||
|
was changed from "BUILD_HALF" to "BUILD_BFLOAT16".
|
||||||
|
* Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper
|
||||||
|
limit for placing temporary arrays on the stack) to be compatible
|
||||||
|
with a stack size of 1mb (as imposed by the JAVA runtime library)
|
||||||
|
* Added mixed-precision dot function SBDOT and utility functions
|
||||||
|
shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between
|
||||||
|
single or double precision float arrays and bfloat16 arrays
|
||||||
|
* Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions
|
||||||
|
in lapack.h
|
||||||
|
* Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2
|
||||||
|
(causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263)
|
||||||
|
* Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415)
|
||||||
|
* Fixed several bugs in the LAPACK testsuite
|
||||||
|
* Improved performance of TRMM and TRSM for certain problem sizes
|
||||||
|
* Fixed infinite recursions and workspace miscalculations in ReLAPACK
|
||||||
|
* CMAKE builds no longer require pkg-config for creating the .pc file
|
||||||
|
* Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as
|
||||||
|
enabling these options
|
||||||
|
* Fixed detection of gfortran when invoked through an mpi wrapper
|
||||||
|
* Improve thread reinitialization performance with OpenMP after a fork
|
||||||
|
* Added support for building only the subset of the library required
|
||||||
|
for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE
|
||||||
|
* Optional function name prefixes and suffixes are now correctly
|
||||||
|
reflected in the generated cblas.h
|
||||||
|
* Added CMAKE build support for the LAPACK and multithreading tests
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Added optimized support for POWER10
|
||||||
|
* Added support for compiling for POWER8 in 32bit mode
|
||||||
|
* Added support for compilation with LLVM/clang
|
||||||
|
* Added support for compilation with NVIDIA/PGI compilers
|
||||||
|
* Fixed building on big-endian POWER8
|
||||||
|
* Fixed miscompilation of ZDOTC by gcc10
|
||||||
|
* Fixed alignment errors in the POWER8 SAXPY kernel
|
||||||
|
* Improved CPU detection on AIX
|
||||||
|
* Supported building with older compilers on POWER9
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* Added support for Intel Cooperlake
|
||||||
|
* Added autodetection of AMD Renoir/Matisse/Zen3 cpus
|
||||||
|
* Added autodetection of Intel Comet Lake cpus
|
||||||
|
* Reimplemented ?sum, ?dot and daxpy using universal intrinsics
|
||||||
|
* Reset the fpu state before using the fpu on Windows as a workaround
|
||||||
|
for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004)
|
||||||
|
* Fixed potentially undefined behaviour in the dot and gemv_t kernels
|
||||||
|
* Fixed a potential segmentation fault in DYNAMIC_ARCH builds
|
||||||
|
* Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers
|
||||||
|
|
||||||
|
ARMV7:
|
||||||
|
* Fixed cpu detection on BSD-like systems
|
||||||
|
|
||||||
|
ARMV8:
|
||||||
|
* Added preliminary support for Apple Vortex cpus
|
||||||
|
* Added support for the Cavium ThunderX3T110 cpu
|
||||||
|
* Fixed cpu detection on BSD-like systems
|
||||||
|
* Fixed compilation in -std=C18 mode
|
||||||
|
|
||||||
|
IBM Z:
|
||||||
|
* Added support for compiling with the clang compiler
|
||||||
|
* Improved GEMM performance on Z14
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.3.10
|
Version 0.3.10
|
||||||
14-Jun-2020
|
14-Jun-2020
|
||||||
|
|
|
@ -12,3 +12,8 @@ ifeq ($(CORE), ARMV6)
|
||||||
CCOMMON_OPT += -mfpu=vfp
|
CCOMMON_OPT += -mfpu=vfp
|
||||||
FCOMMON_OPT += -mfpu=vfp
|
FCOMMON_OPT += -mfpu=vfp
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef HAVE_NEON
|
||||||
|
CCOMMON_OPT += -mfpu=neon
|
||||||
|
FCOMMON_OPT += -mfpu=neon
|
||||||
|
endif
|
||||||
|
|
|
@ -10,7 +10,7 @@ USE_OPENMP = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER10)
|
ifeq ($(CORE), POWER10)
|
||||||
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.3.10.dev
|
VERSION = 0.3.12.dev
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
@ -295,10 +295,13 @@ COMMON_PROF = -pg
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# the below is not yet configurable, use cmake if you need to build only select types
|
# By default the library contains BLAS functions (and LAPACK if selected) for all input types.
|
||||||
BUILD_SINGLE = 1
|
# To build a smaller library supporting e.g. only single precision real (SGEMM etc.) or only
|
||||||
BUILD_DOUBLE = 1
|
# the functions for complex numbers, uncomment the desired type(s) below
|
||||||
BUILD_COMPLEX = 1
|
# BUILD_SINGLE = 1
|
||||||
BUILD_COMPLEX16 = 1
|
# BUILD_DOUBLE = 1
|
||||||
|
# BUILD_COMPLEX = 1
|
||||||
|
# BUILD_COMPLEX16 = 1
|
||||||
|
#
|
||||||
# End of user configuration
|
# End of user configuration
|
||||||
#
|
#
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
INCLUDED = 1
|
INCLUDED = 1
|
||||||
|
|
||||||
ifndef TOPDIR
|
ifndef TOPDIR
|
||||||
TOPDIR = .
|
TOPDIR = .
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# If ARCH is not set, we use the host system's architecture for getarch compile options.
|
# If ARCH is not set, we use the host system's architecture for getarch compile options.
|
||||||
|
@ -93,6 +93,12 @@ endif
|
||||||
ifdef TARGET
|
ifdef TARGET
|
||||||
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
GETARCH_FLAGS := -DFORCE_$(TARGET)
|
||||||
GETARCH_FLAGS += -DUSER_TARGET
|
GETARCH_FLAGS += -DUSER_TARGET
|
||||||
|
ifeq ($(TARGET), GENERIC)
|
||||||
|
ifeq ($(DYNAMIC_ARCH), 1)
|
||||||
|
override NO_EXPRECISION=1
|
||||||
|
export NO_EXPRECiSION
|
||||||
|
endif
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
# Force fallbacks for 32bit
|
# Force fallbacks for 32bit
|
||||||
|
@ -246,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)"
|
||||||
ifndef TARGET_CORE
|
ifndef TARGET_CORE
|
||||||
include $(TOPDIR)/Makefile.conf
|
include $(TOPDIR)/Makefile.conf
|
||||||
else
|
else
|
||||||
|
HAVE_NEON=
|
||||||
|
HAVE_VFP=
|
||||||
|
HAVE_VFPV3=
|
||||||
|
HAVE_VFPV4=
|
||||||
|
HAVE_MMX=
|
||||||
|
HAVE_SSE=
|
||||||
|
HAVE_SSE2=
|
||||||
|
HAVE_SSE3=
|
||||||
|
HAVE_SSSE3=
|
||||||
|
HAVE_SSE4_1=
|
||||||
|
HAVE_SSE4_2=
|
||||||
|
HAVE_SSE4A=
|
||||||
|
HAVE_SSE5=
|
||||||
|
HAVE_AVX=
|
||||||
|
HAVE_AVX2=
|
||||||
|
HAVE_FMA3=
|
||||||
include $(TOPDIR)/Makefile_kernel.conf
|
include $(TOPDIR)/Makefile_kernel.conf
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -319,6 +341,7 @@ ifeq ($(GCCVERSIONGTEQ7),1)
|
||||||
else
|
else
|
||||||
GCCDUMPVERSION_PARAM := -dumpversion
|
GCCDUMPVERSION_PARAM := -dumpversion
|
||||||
endif
|
endif
|
||||||
|
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 1)
|
||||||
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
|
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
|
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
|
||||||
endif
|
endif
|
||||||
|
@ -641,6 +664,7 @@ DYNAMIC_CORE += POWER8
|
||||||
ifneq ($(C_COMPILER), GCC)
|
ifneq ($(C_COMPILER), GCC)
|
||||||
DYNAMIC_CORE += POWER9
|
DYNAMIC_CORE += POWER9
|
||||||
DYNAMIC_CORE += POWER10
|
DYNAMIC_CORE += POWER10
|
||||||
|
CCOMMON_OPT += -DHAVE_P10_SUPPORT
|
||||||
endif
|
endif
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
ifeq ($(GCCVERSIONGT5), 1)
|
ifeq ($(GCCVERSIONGT5), 1)
|
||||||
|
@ -648,11 +672,14 @@ DYNAMIC_CORE += POWER9
|
||||||
else
|
else
|
||||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
|
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
|
||||||
endif
|
endif
|
||||||
ifeq ($(GCCVERSIONGTEQ11), 1)
|
LDVERSIONGTEQ35 := $(shell expr `ld --version | head -1 | cut -f2 -d "." | cut -f1 -d "-"` >= 35)
|
||||||
|
ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
|
||||||
DYNAMIC_CORE += POWER10
|
DYNAMIC_CORE += POWER10
|
||||||
|
CCOMMON_OPT += -DHAVE_P10_SUPPORT
|
||||||
else ifeq ($(GCCVERSIONGTEQ10), 1)
|
else ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||||
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
|
ifeq ($(GCCMINORVERSIONGTEQ2)$(LDVERSIONGTEQ35), 11)
|
||||||
DYNAMIC_CORE += POWER10
|
DYNAMIC_CORE += POWER10
|
||||||
|
CCOMMON_OPT += -DHAVE_P10_SUPPORT
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
|
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
|
||||||
|
@ -853,7 +880,7 @@ CCOMMON_OPT += -DF_INTERFACE_FLANG
|
||||||
FCOMMON_OPT += -Mrecursive -Kieee
|
FCOMMON_OPT += -Mrecursive -Kieee
|
||||||
ifeq ($(OSNAME), Linux)
|
ifeq ($(OSNAME), Linux)
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
|
FLANG_VENDOR := $(shell `$(FC) --version|cut -f 1 -d "."|head -1`)
|
||||||
ifeq ($(FLANG_VENDOR),AOCC)
|
ifeq ($(FLANG_VENDOR),AOCC)
|
||||||
FCOMMON_OPT += -fno-unroll-loops
|
FCOMMON_OPT += -fno-unroll-loops
|
||||||
endif
|
endif
|
||||||
|
@ -1515,6 +1542,8 @@ export HAVE_SSE4_2
|
||||||
export HAVE_SSE4A
|
export HAVE_SSE4A
|
||||||
export HAVE_SSE5
|
export HAVE_SSE5
|
||||||
export HAVE_AVX
|
export HAVE_AVX
|
||||||
|
export HAVE_AVX2
|
||||||
|
export HAVE_FMA3
|
||||||
export HAVE_VFP
|
export HAVE_VFP
|
||||||
export HAVE_VFPV3
|
export HAVE_VFPV3
|
||||||
export HAVE_VFPV4
|
export HAVE_VFPV4
|
||||||
|
|
|
@ -9,9 +9,9 @@ endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef HAVE_SSE3
|
ifdef HAVE_SSE3
|
||||||
ifndef DYNAMIC_ARCH
|
|
||||||
CCOMMON_OPT += -msse3
|
CCOMMON_OPT += -msse3
|
||||||
FCOMMON_OPT += -msse3
|
FCOMMON_OPT += -msse3
|
||||||
|
endif
|
||||||
ifdef HAVE_SSSE3
|
ifdef HAVE_SSSE3
|
||||||
CCOMMON_OPT += -mssse3
|
CCOMMON_OPT += -mssse3
|
||||||
FCOMMON_OPT += -mssse3
|
FCOMMON_OPT += -mssse3
|
||||||
|
@ -20,7 +20,17 @@ ifdef HAVE_SSE4_1
|
||||||
CCOMMON_OPT += -msse4.1
|
CCOMMON_OPT += -msse4.1
|
||||||
FCOMMON_OPT += -msse4.1
|
FCOMMON_OPT += -msse4.1
|
||||||
endif
|
endif
|
||||||
|
ifdef HAVE_AVX
|
||||||
|
CCOMMON_OPT += -mavx
|
||||||
|
FCOMMON_OPT += -mavx
|
||||||
endif
|
endif
|
||||||
|
ifdef HAVE_AVX2
|
||||||
|
CCOMMON_OPT += -mavx2
|
||||||
|
FCOMMON_OPT += -mavx2
|
||||||
|
endif
|
||||||
|
ifdef HAVE_FMA3
|
||||||
|
CCOMMON_OPT += -mfma
|
||||||
|
FCOMMON_OPT += -mfma
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), SKYLAKEX)
|
ifeq ($(CORE), SKYLAKEX)
|
||||||
|
@ -47,8 +57,6 @@ ifndef DYNAMIC_ARCH
|
||||||
ifndef NO_AVX512
|
ifndef NO_AVX512
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
# cooperlake support was added in 10.1
|
# cooperlake support was added in 10.1
|
||||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
|
|
||||||
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1)
|
|
||||||
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
|
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
|
||||||
CCOMMON_OPT += -march=cooperlake
|
CCOMMON_OPT += -march=cooperlake
|
||||||
FCOMMON_OPT += -march=cooperlake
|
FCOMMON_OPT += -march=cooperlake
|
||||||
|
@ -68,24 +76,31 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
|
ifdef HAVE_AVX2
|
||||||
ifndef DYNAMIC_ARCH
|
|
||||||
ifndef NO_AVX2
|
ifndef NO_AVX2
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
# AVX2 support was added in 4.7.0
|
# AVX2 support was added in 4.7.0
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
CCOMMON_OPT += -mavx2
|
||||||
|
endif
|
||||||
|
else
|
||||||
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
CCOMMON_OPT += -mavx2
|
CCOMMON_OPT += -mavx2
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
ifeq ($(F_COMPILER), GFORTRAN)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
# AVX2 support was added in 4.7.0
|
# AVX2 support was added in 4.7.0
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||||
|
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||||
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||||
FCOMMON_OPT += -mavx2
|
FCOMMON_OPT += -mavx2
|
||||||
endif
|
endif
|
||||||
|
else
|
||||||
|
ifeq ($(F_COMPILER), FLANG)
|
||||||
|
FCOMMON_OPT += -mavx2
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
170
benchmark/amax.c
170
benchmark/amax.c
|
@ -25,125 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef AMAX
|
#undef AMAX
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define AMAX BLASFUNC(dzamax)
|
#define AMAX BLASFUNC(dzamax)
|
||||||
#else
|
#else
|
||||||
#define AMAX BLASFUNC(scamax)
|
#define AMAX BLASFUNC(scamax)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define AMAX BLASFUNC(damax)
|
#define AMAX BLASFUNC(damax)
|
||||||
#else
|
#else
|
||||||
#define AMAX BLASFUNC(samax)
|
#define AMAX BLASFUNC(samax)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
blasint m, i;
|
blasint m, i;
|
||||||
blasint inc_x=1;
|
blasint inc_x = 1;
|
||||||
int loops = 1;
|
int loops = 1;
|
||||||
int l;
|
int l;
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
|
int from = 1;
|
||||||
|
int to = 200;
|
||||||
|
int step = 1;
|
||||||
|
|
||||||
int from = 1;
|
double time1, timeg;
|
||||||
int to = 200;
|
|
||||||
int step = 1;
|
|
||||||
|
|
||||||
struct timeval start, stop;
|
argc--;
|
||||||
double time1,timeg;
|
argv++;
|
||||||
|
|
||||||
argc--;argv++;
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
from = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
to = MAX(atol(*argv), from);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
step = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
|
||||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
loops = atoi(p);
|
||||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
if ((p = getenv("OPENBLAS_INCX")))
|
||||||
|
inc_x = atoi(p);
|
||||||
|
|
||||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
|
||||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
|
||||||
|
|
||||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||||
|
{
|
||||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
fprintf(stderr, "Out of Memory!!\n");
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __linux
|
#ifdef __linux
|
||||||
|
@ -152,37 +100,31 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
fprintf(stderr, " SIZE Flops\n");
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
for(m = from; m <= to; m += step)
|
for (m = from; m <= to; m += step)
|
||||||
{
|
{
|
||||||
|
|
||||||
timeg=0;
|
timeg = 0;
|
||||||
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
for (l = 0; l < loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||||
|
{
|
||||||
|
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
begin();
|
||||||
{
|
AMAX(&m, x, &inc_x);
|
||||||
|
end();
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
timeg += getsec();
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
|
||||||
}
|
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
|
||||||
AMAX (&m, x, &inc_x);
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops %10.6f sec\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
167
benchmark/amin.c
167
benchmark/amin.c
|
@ -25,124 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef AMIN
|
#undef AMIN
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define AMIN BLASFUNC(dzamin)
|
#define AMIN BLASFUNC(dzamin)
|
||||||
#else
|
#else
|
||||||
#define AMIN BLASFUNC(scamin)
|
#define AMIN BLASFUNC(scamin)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define AMIN BLASFUNC(damin)
|
#define AMIN BLASFUNC(damin)
|
||||||
#else
|
#else
|
||||||
#define AMIN BLASFUNC(samin)
|
#define AMIN BLASFUNC(samin)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
blasint m, i;
|
blasint m, i;
|
||||||
blasint inc_x=1;
|
blasint inc_x = 1;
|
||||||
int loops = 1;
|
int loops = 1;
|
||||||
int l;
|
int l;
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
double time1, timeg;
|
||||||
double time1,timeg;
|
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;
|
||||||
|
argv++;
|
||||||
|
|
||||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
if (argc > 0)
|
||||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
{
|
||||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
from = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
to = MAX(atol(*argv), from);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
step = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
|
||||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX")))
|
||||||
|
inc_x = atoi(p);
|
||||||
|
|
||||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
|
||||||
|
|
||||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
{
|
||||||
|
fprintf(stderr, "Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __linux
|
#ifdef __linux
|
||||||
|
@ -151,39 +100,35 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
fprintf(stderr, " SIZE Flops\n");
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
for(m = from; m <= to; m += step)
|
for (m = from; m <= to; m += step)
|
||||||
{
|
{
|
||||||
|
|
||||||
timeg=0;
|
timeg = 0;
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
|
for (l = 0; l < loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||||
{
|
{
|
||||||
|
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
begin();
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
|
||||||
}
|
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
AMIN(&m, x, &inc_x);
|
||||||
|
|
||||||
AMIN (&m, x, &inc_x);
|
end();
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
|
timeg += getsec();
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops %10.6f sec\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
184
benchmark/asum.c
184
benchmark/asum.c
|
@ -25,132 +25,74 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef ASUM
|
#undef ASUM
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define ASUM BLASFUNC(dzasum)
|
#define ASUM BLASFUNC(dzasum)
|
||||||
#else
|
#else
|
||||||
#define ASUM BLASFUNC(scasum)
|
#define ASUM BLASFUNC(scasum)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define ASUM BLASFUNC(dasum)
|
#define ASUM BLASFUNC(dasum)
|
||||||
#else
|
#else
|
||||||
#define ASUM BLASFUNC(sasum)
|
#define ASUM BLASFUNC(sasum)
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
FLOAT result;
|
FLOAT result;
|
||||||
blasint m, i;
|
blasint m, i;
|
||||||
blasint inc_x=1;
|
blasint inc_x = 1;
|
||||||
int loops = 1;
|
int loops = 1;
|
||||||
int l;
|
int l;
|
||||||
char *p;
|
char *p;
|
||||||
|
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
|
||||||
#else
|
|
||||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
#endif
|
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;
|
||||||
|
argv++;
|
||||||
|
|
||||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
if (argc > 0)
|
||||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
{
|
||||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
from = atol(*argv);
|
||||||
|
argc--;
|
||||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
argv++;
|
||||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
}
|
||||||
|
if (argc > 0)
|
||||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
{
|
||||||
|
to = MAX(atol(*argv), from);
|
||||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
argc--;
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
argv++;
|
||||||
|
}
|
||||||
|
if (argc > 0)
|
||||||
|
{
|
||||||
|
step = atol(*argv);
|
||||||
|
argc--;
|
||||||
|
argv++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||||
|
loops = atoi(p);
|
||||||
|
if ((p = getenv("OPENBLAS_INCX")))
|
||||||
|
inc_x = atoi(p);
|
||||||
|
|
||||||
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
|
||||||
|
|
||||||
|
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||||
|
{
|
||||||
|
fprintf(stderr, "Out of Memory!!\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __linux
|
#ifdef __linux
|
||||||
srandom(getpid());
|
srandom(getpid());
|
||||||
|
@ -158,45 +100,33 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
fprintf(stderr, " SIZE Flops\n");
|
fprintf(stderr, " SIZE Flops\n");
|
||||||
|
|
||||||
for(m = from; m <= to; m += step)
|
for (m = from; m <= to; m += step)
|
||||||
{
|
{
|
||||||
|
|
||||||
timeg=0;
|
timeg = 0;
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (l = 0; l < loops; l++)
|
||||||
{
|
{
|
||||||
|
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
|
||||||
}
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
|
||||||
#else
|
|
||||||
clock_gettime(CLOCK_REALTIME, &start);
|
|
||||||
#endif
|
|
||||||
result = ASUM (&m, x, &inc_x);
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
|
||||||
clock_gettime(CLOCK_REALTIME, &stop);
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
#else
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
|
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||||
|
{
|
||||||
|
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
|
||||||
|
}
|
||||||
|
begin();
|
||||||
|
result = ASUM(&m, x, &inc_x);
|
||||||
|
end();
|
||||||
|
timeg += getsec();
|
||||||
}
|
}
|
||||||
if (loops >1)
|
if (loops > 1)
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
|
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef AXPBY
|
#undef AXPBY
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -129,7 +58,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -176,16 +104,10 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (l=0; l<loops; l++)
|
||||||
{
|
{
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
|
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
|
||||||
|
end();
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
timeg += getsec();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef AXPY
|
#undef AXPY
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -127,8 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -175,13 +102,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
clock_gettime( CLOCK_REALTIME, &start);
|
begin();
|
||||||
|
|
||||||
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
||||||
|
|
||||||
clock_gettime( CLOCK_REALTIME, &stop);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <time.h>
|
||||||
|
#ifdef __CYGWIN32__
|
||||||
|
#include <sys/time.h>
|
||||||
|
#endif
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__)
|
||||||
|
|
||||||
|
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||||
|
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz){
|
||||||
|
|
||||||
|
FILETIME ft;
|
||||||
|
unsigned __int64 tmpres = 0;
|
||||||
|
static int tzflag;
|
||||||
|
|
||||||
|
if (NULL != tv)
|
||||||
|
{
|
||||||
|
GetSystemTimeAsFileTime(&ft);
|
||||||
|
|
||||||
|
tmpres |= ft.dwHighDateTime;
|
||||||
|
tmpres <<= 32;
|
||||||
|
tmpres |= ft.dwLowDateTime;
|
||||||
|
|
||||||
|
/*converting file time to unix epoch*/
|
||||||
|
tmpres /= 10; /*convert into microseconds*/
|
||||||
|
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||||
|
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||||
|
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||||
|
|
||||||
|
static void *huge_malloc(BLASLONG size){
|
||||||
|
int shmid;
|
||||||
|
void *address;
|
||||||
|
|
||||||
|
#ifndef SHM_HUGETLB
|
||||||
|
#define SHM_HUGETLB 04000
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((shmid =shmget(IPC_PRIVATE,
|
||||||
|
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||||
|
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||||
|
printf( "Memory allocation failed(shmget).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
address = shmat(shmid, NULL, SHM_RND);
|
||||||
|
|
||||||
|
if ((BLASLONG)address == -1){
|
||||||
|
printf( "Memory allocation failed(shmat).\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
shmctl(shmid, IPC_RMID, 0);
|
||||||
|
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#define malloc huge_malloc
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
|
struct timeval start, stop;
|
||||||
|
#else
|
||||||
|
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double getsec()
|
||||||
|
{
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
|
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
#else
|
||||||
|
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void begin() {
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
|
gettimeofday( &start, (struct timezone *)0);
|
||||||
|
#else
|
||||||
|
clock_gettime(CLOCK_REALTIME, &start);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void end() {
|
||||||
|
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||||
|
gettimeofday( &stop, (struct timezone *)0);
|
||||||
|
#else
|
||||||
|
clock_gettime(CLOCK_REALTIME, &stop);
|
||||||
|
#endif
|
||||||
|
}
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
double fabs(double);
|
double fabs(double);
|
||||||
|
|
||||||
|
@ -71,41 +66,6 @@ double fabs(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
static __inline double getmflops(int ratio, int m, double secs){
|
static __inline double getmflops(int ratio, int m, double secs){
|
||||||
|
|
||||||
double mm = (double)m;
|
double mm = (double)m;
|
||||||
|
@ -145,7 +105,6 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT maxerr;
|
FLOAT maxerr;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -220,20 +179,19 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info != 0) {
|
if (info != 0) {
|
||||||
fprintf(stderr, "Info = %d\n", info);
|
fprintf(stderr, "Info = %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
maxerr = 0.;
|
|
||||||
|
|
||||||
if (!(uplos & 1)) {
|
if (!(uplos & 1)) {
|
||||||
for (j = 0; j < m; j++) {
|
for (j = 0; j < m; j++) {
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef COPY
|
#undef COPY
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -128,11 +57,9 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1 = 0.0, timeg = 0.0;
|
double time1 = 0.0, timeg = 0.0;
|
||||||
long nanos = 0;
|
long nanos = 0;
|
||||||
time_t seconds = 0;
|
time_t seconds = 0;
|
||||||
struct timespec time_start = { 0, 0 }, time_end = { 0, 0 };
|
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
|
||||||
|
@ -176,15 +103,10 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (l=0; l<loops; l++)
|
||||||
{
|
{
|
||||||
clock_gettime(CLOCK_REALTIME, &time_start);
|
begin();
|
||||||
COPY (&m, x, &inc_x, y, &inc_y );
|
COPY (&m, x, &inc_x, y, &inc_y );
|
||||||
clock_gettime(CLOCK_REALTIME, &time_end);
|
end();
|
||||||
|
timeg += getsec();
|
||||||
nanos = time_end.tv_nsec - time_start.tv_nsec;
|
|
||||||
seconds = time_end.tv_sec - time_start.tv_sec;
|
|
||||||
|
|
||||||
time1 = seconds + nanos / 1.e9;
|
|
||||||
timeg += time1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef DOT
|
#undef DOT
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define DOT BLASFUNC(ddot)
|
#define DOT BLASFUNC(ddot)
|
||||||
#else
|
#else
|
||||||
#define DOT BLASFUNC(sdot)
|
#define DOT BLASFUNC(sdot)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -122,7 +49,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -169,15 +95,12 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
timeg += getsec();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,13 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEEV
|
#undef GEEV
|
||||||
|
|
||||||
|
@ -74,71 +68,6 @@ extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
|
||||||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
|
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||||
|
@ -154,7 +83,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -223,7 +151,7 @@ int main(int argc, char *argv[]){
|
||||||
for(m = from; m <= to; m += step){
|
for(m = from; m <= to; m += step){
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
lwork = -1;
|
lwork = -1;
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
|
@ -239,14 +167,14 @@ int main(int argc, char *argv[]){
|
||||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
|
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEMM
|
#undef GEMM
|
||||||
|
|
||||||
|
@ -55,71 +49,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
IFLOAT *a, *b;
|
IFLOAT *a, *b;
|
||||||
|
@ -139,7 +68,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -228,14 +156,14 @@ int main(int argc, char *argv[]){
|
||||||
ldc = m;
|
ldc = m;
|
||||||
|
|
||||||
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
|
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
for (j=0; j<loops; j++) {
|
for (j=0; j<loops; j++) {
|
||||||
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
|
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg = time1/loops;
|
timeg = time1/loops;
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEMM
|
#undef GEMM
|
||||||
|
|
||||||
|
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -133,7 +62,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -187,16 +115,12 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
timeg += getsec();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEMV
|
#undef GEMV
|
||||||
|
@ -52,72 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -211,10 +139,10 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -248,10 +176,10 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GER
|
#undef GER
|
||||||
|
|
||||||
|
@ -49,72 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -131,7 +59,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -198,16 +125,13 @@ int main(int argc, char *argv[]){
|
||||||
for (l=0; l<loops; l++)
|
for (l=0; l<loops; l++)
|
||||||
{
|
{
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
|
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
timeg += getsec();
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
|
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
double fabs(double);
|
double fabs(double);
|
||||||
|
|
||||||
|
@ -66,71 +61,6 @@ double fabs(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b;
|
FLOAT *a, *b;
|
||||||
|
@ -142,7 +72,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -194,22 +123,18 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
|
|
||||||
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
"%10.2f MFlops %10.6f s\n",
|
"%10.2f MFlops %10.6f s\n",
|
||||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef GETRF
|
#undef GETRF
|
||||||
#undef GETRI
|
#undef GETRI
|
||||||
|
@ -72,71 +67,6 @@
|
||||||
|
|
||||||
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
|
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a,*work;
|
FLOAT *a,*work;
|
||||||
|
@ -148,7 +78,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -205,21 +134,21 @@ int main(int argc, char *argv[]){
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
lwork = -1;
|
lwork = -1;
|
||||||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
||||||
|
|
||||||
lwork = (blasint)wkopt[0];
|
lwork = (blasint)wkopt[0];
|
||||||
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HBMV
|
#undef HBMV
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HBMV BLASFUNC(zhbmv)
|
#define HBMV BLASFUNC(zhbmv)
|
||||||
#else
|
#else
|
||||||
#define HBMV BLASFUNC(chbmv)
|
#define HBMV BLASFUNC(chbmv)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz) {
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size) {
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -125,7 +52,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -186,15 +112,13 @@ int main(int argc, char *argv[]){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
timeg += getsec();
|
||||||
|
|
||||||
timeg += time1;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HEMM
|
#undef HEMM
|
||||||
|
|
||||||
|
@ -41,72 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define HEMM BLASFUNC(chemm)
|
#define HEMM BLASFUNC(chemm)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -126,7 +54,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -170,13 +97,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HEMV
|
#undef HEMV
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HEMV BLASFUNC(zhemv)
|
#define HEMV BLASFUNC(zhemv)
|
||||||
#else
|
#else
|
||||||
#define HEMV BLASFUNC(chemv)
|
#define HEMV BLASFUNC(chemv)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -124,7 +51,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -182,13 +108,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HER
|
#undef HER
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HER BLASFUNC(zher)
|
#define HER BLASFUNC(zher)
|
||||||
#else
|
#else
|
||||||
#define HER BLASFUNC(cher)
|
#define HER BLASFUNC(cher)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x;
|
FLOAT *a, *x;
|
||||||
|
@ -126,8 +53,6 @@ int main(int argc, char *argv[]){
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -166,15 +91,13 @@ int main(int argc, char *argv[]){
|
||||||
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HER (&uplo, &m, alpha, x, &incx, a, &m );
|
HER (&uplo, &m, alpha, x, &incx, a, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HER2
|
#undef HER2
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HER2 BLASFUNC(zher2)
|
#define HER2 BLASFUNC(zher2)
|
||||||
#else
|
#else
|
||||||
#define HER2 BLASFUNC(cher2)
|
#define HER2 BLASFUNC(cher2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -127,7 +54,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -169,16 +95,13 @@ int main(int argc, char *argv[]){
|
||||||
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
|
|
||||||
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
|
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HER2K
|
#undef HER2K
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
|
@ -40,72 +34,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define HER2K BLASFUNC(cher2k)
|
#define HER2K BLASFUNC(cher2k)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -125,7 +53,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -169,13 +96,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HERK
|
#undef HERK
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HERK BLASFUNC(zherk)
|
#define HERK BLASFUNC(zherk)
|
||||||
#else
|
#else
|
||||||
#define HERK BLASFUNC(cherk)
|
#define HERK BLASFUNC(cherk)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *c;
|
FLOAT *a, *c;
|
||||||
|
@ -127,7 +54,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -167,18 +93,17 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef HPMV
|
#undef HPMV
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define HPMV BLASFUNC(zhpmv)
|
#define HPMV BLASFUNC(zhpmv)
|
||||||
#else
|
#else
|
||||||
#define HPMV BLASFUNC(chpmv)
|
#define HPMV BLASFUNC(chpmv)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz) {
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size) {
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -124,7 +51,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -183,13 +109,13 @@ int main(int argc, char *argv[]){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef IAMAX
|
#undef IAMAX
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
IAMAX (&m, x, &inc_x);
|
IAMAX (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef IAMIN
|
#undef IAMIN
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
IAMIN (&m, x, &inc_x);
|
IAMIN (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef IMAX
|
#undef IMAX
|
||||||
|
|
||||||
|
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
IMAX (&m, x, &inc_x);
|
IMAX (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef IMIN
|
#undef IMIN
|
||||||
|
|
||||||
|
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
IMIN (&m, x, &inc_x);
|
IMIN (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
double fabs(double);
|
double fabs(double);
|
||||||
|
|
||||||
|
@ -72,71 +67,6 @@ double fabs(double);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b;
|
FLOAT *a, *b;
|
||||||
|
@ -151,7 +81,6 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT maxerr;
|
FLOAT maxerr;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1, time2;
|
double time1, time2;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -198,31 +127,31 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GETRF (&m, &m, a, &m, ipiv, &info);
|
GETRF (&m, &m, a, &m, ipiv, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);
|
GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time2 = getsec();
|
||||||
|
|
||||||
maxerr = 0.;
|
maxerr = 0.;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef NAMAX
|
#undef NAMAX
|
||||||
|
|
||||||
|
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
NAMAX (&m, x, &inc_x);
|
NAMAX (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef NAMIN
|
#undef NAMIN
|
||||||
|
|
||||||
|
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
NAMIN (&m, x, &inc_x);
|
NAMIN (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef NRM2
|
#undef NRM2
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x;
|
FLOAT *x;
|
||||||
|
@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
|
||||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
NRM2 (&m, x, &inc_x);
|
NRM2 (&m, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -36,12 +36,7 @@
|
||||||
/* or implied, of The University of Texas at Austin. */
|
/* or implied, of The University of Texas at Austin. */
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
double fabs(double);
|
double fabs(double);
|
||||||
|
|
||||||
|
@ -86,37 +81,7 @@ double fabs(double);
|
||||||
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
|
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
|
||||||
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
|
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
|
@ -141,7 +106,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -217,18 +181,18 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info != 0) {
|
if (info != 0) {
|
||||||
fprintf(stderr, "Potrf info = %d\n", info);
|
fprintf(stderr, "Potrf info = %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
||||||
|
|
||||||
if ( btest == 'S' )
|
if ( btest == 'S' )
|
||||||
|
@ -240,17 +204,17 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
|
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info != 0) {
|
if (info != 0) {
|
||||||
fprintf(stderr, "Potrs info = %d\n", info);
|
fprintf(stderr, "Potrs info = %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -258,18 +222,18 @@ int main(int argc, char *argv[]){
|
||||||
if ( btest == 'I' )
|
if ( btest == 'I' )
|
||||||
{
|
{
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
POTRI(uplo[uplos], &m, b, &m, &info);
|
POTRI(uplo[uplos], &m, b, &m, &info);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
if (info != 0) {
|
if (info != 0) {
|
||||||
fprintf(stderr, "Potri info = %d\n", info);
|
fprintf(stderr, "Potri info = %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
|
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef ROT
|
#undef ROT
|
||||||
|
|
||||||
|
@ -52,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -133,7 +63,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -179,13 +108,13 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for (l=0; l<loops; l++)
|
for (l=0; l<loops; l++)
|
||||||
{
|
{
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||||
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef ROTM
|
#undef ROTM
|
||||||
|
|
||||||
|
@ -40,72 +35,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ROTM BLASFUNC(srotm)
|
#define ROTM BLASFUNC(srotm)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz)
|
|
||||||
{
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv) {
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size)
|
|
||||||
{
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =
|
|
||||||
shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT | 0600)) < 0) {
|
|
||||||
printf("Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1) {
|
|
||||||
printf("Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -122,7 +51,7 @@ int main(int argc, char *argv[])
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;
|
argc--;
|
||||||
|
@ -188,14 +117,13 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
for (l = 0; l < loops; l++) {
|
for (l = 0; l < loops; l++) {
|
||||||
gettimeofday(&start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
ROTM(&m, x, &inc_x, y, &inc_y, param);
|
ROTM(&m, x, &inc_x, y, &inc_y, param);
|
||||||
|
|
||||||
gettimeofday(&stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) +
|
time1 = getsec();
|
||||||
(double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SCAL
|
#undef SCAL
|
||||||
|
|
||||||
|
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -128,7 +57,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -174,13 +102,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SCAL (&m, alpha, x, &inc_x);
|
SCAL (&m, alpha, x, &inc_x);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,17 +25,10 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SPMV
|
#undef SPMV
|
||||||
|
|
||||||
|
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
|
@ -54,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -135,7 +63,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -193,13 +120,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SPR
|
#undef SPR
|
||||||
|
|
||||||
|
@ -41,73 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SPR BLASFUNC(sspr)
|
#define SPR BLASFUNC(sspr)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a,*c;
|
FLOAT *a,*c;
|
||||||
|
@ -129,7 +56,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -173,13 +99,13 @@ int main(int argc, char *argv[]){
|
||||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SPR (&uplo, &m, alpha, c, &inc_x, a);
|
SPR (&uplo, &m, alpha, c, &inc_x, a);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SPR2
|
#undef SPR2
|
||||||
|
@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a,*b,*c;
|
FLOAT *a,*b,*c;
|
||||||
|
@ -129,7 +58,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -182,13 +110,13 @@ int main(int argc, char *argv[]){
|
||||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
|
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SWAP
|
#undef SWAP
|
||||||
|
@ -49,71 +44,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -128,7 +58,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -175,13 +104,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SWAP (&m, x, &inc_x, y, &inc_y );
|
SWAP (&m, x, &inc_x, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYMM
|
#undef SYMM
|
||||||
|
|
||||||
|
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -181,13 +109,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYMV
|
#undef SYMV
|
||||||
|
|
||||||
|
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
|
@ -134,7 +63,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -192,13 +120,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYR
|
#undef SYR
|
||||||
|
@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x,*a;
|
FLOAT *x,*a;
|
||||||
|
@ -124,7 +53,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -165,13 +93,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYR (&uplo, &m, alpha, x, &inc_x, a, &m );
|
SYR (&uplo, &m, alpha, x, &inc_x, a, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYR2
|
#undef SYR2
|
||||||
|
|
||||||
|
@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SYR2 BLASFUNC(ssyr2)
|
#define SYR2 BLASFUNC(ssyr2)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y, *a;
|
FLOAT *x, *y, *a;
|
||||||
|
@ -125,7 +53,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -174,13 +101,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
|
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYR2K
|
#undef SYR2K
|
||||||
|
@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b, *c;
|
FLOAT *a, *b, *c;
|
||||||
|
@ -137,7 +67,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -181,13 +110,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef SYRK
|
#undef SYRK
|
||||||
|
|
||||||
|
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *c;
|
FLOAT *a, *c;
|
||||||
|
@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -177,13 +105,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef TPMV
|
#undef TPMV
|
||||||
|
|
||||||
|
@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size)
|
|
||||||
{
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1) {
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -112,7 +73,6 @@ int main(int argc, char *argv[])
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -153,11 +113,11 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
for (l = 0; l < loops; l++) {
|
for (l = 0; l < loops; l++) {
|
||||||
clock_gettime(CLOCK_REALTIME, &start);
|
begin();
|
||||||
TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
||||||
clock_gettime(CLOCK_REALTIME, &stop);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef TPSV
|
#undef TPSV
|
||||||
|
|
||||||
|
@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size)
|
|
||||||
{
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1) {
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -112,7 +73,6 @@ int main(int argc, char *argv[])
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -153,11 +113,11 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
for (l = 0; l < loops; l++) {
|
for (l = 0; l < loops; l++) {
|
||||||
clock_gettime(CLOCK_REALTIME, &start);
|
begin();
|
||||||
TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x);
|
||||||
clock_gettime(CLOCK_REALTIME, &stop);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef TRMM
|
#undef TRMM
|
||||||
|
@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b;
|
FLOAT *a, *b;
|
||||||
|
@ -141,7 +71,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -180,13 +109,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops %10.6f sec\n",
|
" %10.2f MFlops %10.6f sec\n",
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#undef TRMV
|
#undef TRMV
|
||||||
|
|
||||||
|
@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size)
|
|
||||||
{
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1) {
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -112,7 +73,6 @@ int main(int argc, char *argv[])
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
|
||||||
double time1, timeg;
|
double time1, timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -153,11 +113,11 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
for (l = 0; l < loops; l++) {
|
for (l = 0; l < loops; l++) {
|
||||||
clock_gettime(CLOCK_REALTIME, &start);
|
begin();
|
||||||
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
|
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
|
||||||
clock_gettime(CLOCK_REALTIME, &stop);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
|
time1 = getsec();
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef TRSM
|
#undef TRSM
|
||||||
|
@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *b;
|
FLOAT *a, *b;
|
||||||
|
@ -151,7 +81,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1;
|
double time1;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -196,13 +125,13 @@ int main(int argc, char *argv[]){
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,14 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include <time.h>
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef GEMV
|
#undef GEMV
|
||||||
#undef TRSV
|
#undef TRSV
|
||||||
|
@ -55,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *a, *x;
|
FLOAT *a, *x;
|
||||||
|
@ -133,7 +61,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timespec time_start, time_end;
|
|
||||||
time_t seconds = 0;
|
time_t seconds = 0;
|
||||||
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
@ -189,19 +116,13 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(l =0;l< loops;l++){
|
for(l =0;l< loops;l++){
|
||||||
|
|
||||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start);
|
begin();
|
||||||
|
|
||||||
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
|
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
|
||||||
|
end();
|
||||||
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end);
|
time1 = getsec();
|
||||||
nanos = time_end.tv_nsec - time_start.tv_nsec;
|
|
||||||
seconds = time_end.tv_sec - time_start.tv_sec;
|
|
||||||
|
|
||||||
time1 = seconds + nanos /1.e9;
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
timeg /= loops;
|
timeg /= loops;
|
||||||
long long muls = n*(n+1)/2.0;
|
long long muls = n*(n+1)/2.0;
|
||||||
long long adds = (n - 1.0)*n/2.0;
|
long long adds = (n - 1.0)*n/2.0;
|
||||||
|
|
|
@ -25,90 +25,18 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#define RETURN_BY_STACK 1
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
#define RETURN_BY_STACK 1
|
||||||
|
|
||||||
#undef DOT
|
#undef DOT
|
||||||
|
|
||||||
|
|
||||||
#ifdef DOUBLE
|
#ifdef DOUBLE
|
||||||
#define DOT BLASFUNC(zdotu)
|
#define DOT BLASFUNC(zdotu)
|
||||||
#else
|
#else
|
||||||
#define DOT BLASFUNC(cdotu)
|
#define DOT BLASFUNC(cdotu)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -123,7 +51,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -170,13 +97,13 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
|
|
||||||
DOT (&result, &m, x, &inc_x, y, &inc_y );
|
DOT (&result, &m, x, &inc_x, y, &inc_y );
|
||||||
|
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
|
@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include "bench.h"
|
||||||
#include <stdlib.h>
|
|
||||||
#ifdef __CYGWIN32__
|
|
||||||
#include <sys/time.h>
|
|
||||||
#endif
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
|
|
||||||
#undef DOT
|
#undef DOT
|
||||||
|
|
||||||
|
@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define DOT BLASFUNC(cdotu)
|
#define DOT BLASFUNC(cdotu)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(__WIN32__) || defined(__WIN64__)
|
|
||||||
|
|
||||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
|
||||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int gettimeofday(struct timeval *tv, void *tz){
|
|
||||||
|
|
||||||
FILETIME ft;
|
|
||||||
unsigned __int64 tmpres = 0;
|
|
||||||
static int tzflag;
|
|
||||||
|
|
||||||
if (NULL != tv)
|
|
||||||
{
|
|
||||||
GetSystemTimeAsFileTime(&ft);
|
|
||||||
|
|
||||||
tmpres |= ft.dwHighDateTime;
|
|
||||||
tmpres <<= 32;
|
|
||||||
tmpres |= ft.dwLowDateTime;
|
|
||||||
|
|
||||||
/*converting file time to unix epoch*/
|
|
||||||
tmpres /= 10; /*convert into microseconds*/
|
|
||||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
|
||||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
|
||||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
|
||||||
|
|
||||||
static void *huge_malloc(BLASLONG size){
|
|
||||||
int shmid;
|
|
||||||
void *address;
|
|
||||||
|
|
||||||
#ifndef SHM_HUGETLB
|
|
||||||
#define SHM_HUGETLB 04000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((shmid =shmget(IPC_PRIVATE,
|
|
||||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
|
||||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
|
||||||
printf( "Memory allocation failed(shmget).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
address = shmat(shmid, NULL, SHM_RND);
|
|
||||||
|
|
||||||
if ((BLASLONG)address == -1){
|
|
||||||
printf( "Memory allocation failed(shmat).\n");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
shmctl(shmid, IPC_RMID, 0);
|
|
||||||
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define malloc huge_malloc
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int main(int argc, char *argv[]){
|
int main(int argc, char *argv[]){
|
||||||
|
|
||||||
FLOAT *x, *y;
|
FLOAT *x, *y;
|
||||||
|
@ -122,7 +50,6 @@ int main(int argc, char *argv[]){
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
|
||||||
struct timeval start, stop;
|
|
||||||
double time1,timeg;
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
@ -169,15 +96,15 @@ int main(int argc, char *argv[]){
|
||||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
}
|
}
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
begin();
|
||||||
#ifdef RETURN_BY_STACK
|
#ifdef RETURN_BY_STACK
|
||||||
DOT (&result , &m, x, &inc_x, y, &inc_y );
|
DOT (&result , &m, x, &inc_x, y, &inc_y );
|
||||||
#else
|
#else
|
||||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||||
#endif
|
#endif
|
||||||
gettimeofday( &stop, (struct timezone *)0);
|
end();
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = getsec();
|
||||||
|
|
||||||
timeg += time1;
|
timeg += time1;
|
||||||
|
|
||||||
|
|
1
cblas.h
1
cblas.h
|
@ -393,6 +393,7 @@ void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPE
|
||||||
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
|
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
|
||||||
/* dot production of BFLOAT16 input arrays, and output as float */
|
/* dot production of BFLOAT16 input arrays, and output as float */
|
||||||
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
|
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
|
||||||
|
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,6 +49,7 @@ if (DYNAMIC_ARCH)
|
||||||
|
|
||||||
if (POWER)
|
if (POWER)
|
||||||
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
|
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
|
||||||
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DHAVE_P10_SUPPORT")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (X86)
|
if (X86)
|
||||||
|
|
|
@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CORE} STREQUAL "SKYLAKEX")
|
if (${CORE} STREQUAL SKYLAKEX)
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
if (NOT NO_AVX512)
|
if (NOT NO_AVX512)
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||||
|
@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CORE} STREQUAL "COOPERLAKE")
|
if (${CORE} STREQUAL COOPERLAKE)
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
if (NOT NO_AVX512)
|
if (NOT NO_AVX512)
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
|
|
|
@ -184,8 +184,8 @@ macro(SetDefaultL2)
|
||||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||||
if (BUILD_BFLOAT16)
|
if (BUILD_BFLOAT16)
|
||||||
set(SBGEMVNKERNEL ../arm/gemv_n.c)
|
set(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
|
||||||
set(SBGEMVTKERNEL ../arm/gemv_t.c)
|
set(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
|
||||||
set(SHGERKERNEL ../generic/ger.c)
|
set(SHGERKERNEL ../generic/ger.c)
|
||||||
endif ()
|
endif ()
|
||||||
endmacro ()
|
endmacro ()
|
||||||
|
|
|
@ -84,6 +84,14 @@ if (X86)
|
||||||
set(NO_EXPRECISION 1)
|
set(NO_EXPRECISION 1)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
if (DYNAMIC_ARCH)
|
||||||
|
if (TARGET)
|
||||||
|
if (${TARGET} STREQUAL "GENERIC")
|
||||||
|
set(NO_EXPRECISION 1)
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (UTEST_CHECK)
|
if (UTEST_CHECK)
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
|
||||||
set(SANITY_CHECK 1)
|
set(SANITY_CHECK 1)
|
||||||
|
|
|
@ -416,6 +416,29 @@ endif ()
|
||||||
set(ZGEMM_UNROLL_M 4)
|
set(ZGEMM_UNROLL_M 4)
|
||||||
set(ZGEMM_UNROLL_N 4)
|
set(ZGEMM_UNROLL_N 4)
|
||||||
set(SYMV_P 16)
|
set(SYMV_P 16)
|
||||||
|
elseif ("${TCORE}" STREQUAL "VORTEX")
|
||||||
|
file(APPEND ${TARGET_CONF_TEMP}
|
||||||
|
"#define ARMV8\n"
|
||||||
|
"#define L1_CODE_SIZE\t32768\n"
|
||||||
|
"#define L1_CODE_LINESIZE\t64\n"
|
||||||
|
"#define L1_CODE_ASSOCIATIVE\t4\n"
|
||||||
|
"#define L1_DATA_SIZE\t32768\n"
|
||||||
|
"#define L1_DATA_LINESIZE\t64\n"
|
||||||
|
"#define L1_DATA_ASSOCIATIVE\t4\n"
|
||||||
|
"#define L2_SIZE\t5262144\n"
|
||||||
|
"#define L2_LINESIZE\t64\n"
|
||||||
|
"#define L2_ASSOCIATIVE\t8\n"
|
||||||
|
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||||
|
"#define DTB_SIZE\t4096\n")
|
||||||
|
set(SGEMM_UNROLL_M 16)
|
||||||
|
set(SGEMM_UNROLL_N 4)
|
||||||
|
set(DGEMM_UNROLL_M 8)
|
||||||
|
set(DGEMM_UNROLL_N 4)
|
||||||
|
set(CGEMM_UNROLL_M 8)
|
||||||
|
set(CGEMM_UNROLL_N 4)
|
||||||
|
set(ZGEMM_UNROLL_M 4)
|
||||||
|
set(ZGEMM_UNROLL_N 4)
|
||||||
|
set(SYMV_P 16)
|
||||||
elseif ("${TCORE}" STREQUAL "POWER6")
|
elseif ("${TCORE}" STREQUAL "POWER6")
|
||||||
file(APPEND ${TARGET_CONF_TEMP}
|
file(APPEND ${TARGET_CONF_TEMP}
|
||||||
"#define L1_DATA_SIZE 32768\n"
|
"#define L1_DATA_SIZE 32768\n"
|
||||||
|
@ -533,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING)
|
||||||
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
unset (HAVE_AVX2)
|
||||||
|
unset (HAVE_AVX)
|
||||||
|
unset (HAVE_FMA3)
|
||||||
|
unset (HAVE_MMX)
|
||||||
|
unset (HAVE_SSE)
|
||||||
|
unset (HAVE_SSE2)
|
||||||
|
unset (HAVE_SSE3)
|
||||||
|
unset (HAVE_SSSE3)
|
||||||
|
unset (HAVE_SSE4A)
|
||||||
|
unset (HAVE_SSE4_1)
|
||||||
|
unset (HAVE_SSE4_2)
|
||||||
|
unset (HAVE_NEON)
|
||||||
|
unset (HAVE_VFP)
|
||||||
|
unset (HAVE_VFPV3)
|
||||||
|
unset (HAVE_VFPV4)
|
||||||
message(STATUS "Running getarch")
|
message(STATUS "Running getarch")
|
||||||
|
|
||||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way
|
||||||
|
|
|
@ -44,50 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (DEFINED TARGET)
|
|
||||||
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
|
|
||||||
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
|
||||||
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
|
||||||
else()
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
|
||||||
endif()
|
|
||||||
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
|
||||||
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
|
||||||
# endif()
|
|
||||||
endif()
|
|
||||||
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
|
||||||
endif()
|
|
||||||
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
|
|
||||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
|
||||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
|
||||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
|
||||||
endif()
|
|
||||||
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
if (DEFINED HAVE_SSE)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
|
|
||||||
endif()
|
|
||||||
if (DEFINED HAVE_SSE2)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
|
|
||||||
endif()
|
|
||||||
if (DEFINED HAVE_SSE3)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
|
||||||
endif()
|
|
||||||
if (DEFINED HAVE_SSSE3)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
|
|
||||||
endif()
|
|
||||||
if (DEFINED HAVE_SSE4_1)
|
|
||||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (DEFINED TARGET)
|
if (DEFINED TARGET)
|
||||||
|
message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --")
|
||||||
message(STATUS "Targeting the ${TARGET} architecture.")
|
message(STATUS "Targeting the ${TARGET} architecture.")
|
||||||
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
set(GETARCH_FLAGS "-DFORCE_${TARGET}")
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -187,6 +146,63 @@ else()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
|
||||||
|
if (DEFINED TARGET)
|
||||||
|
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
|
||||||
|
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||||
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
|
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
||||||
|
else()
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||||
|
endif()
|
||||||
|
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||||
|
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||||
|
# endif()
|
||||||
|
endif()
|
||||||
|
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||||
|
endif()
|
||||||
|
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
|
||||||
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||||
|
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||||
|
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||||
|
endif()
|
||||||
|
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_AVX)
|
||||||
|
if (NOT NO_AVX)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_AVX2)
|
||||||
|
if (NOT NO_AVX2)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_FMA3)
|
||||||
|
if (NOT NO_AVX2)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSE)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSE2)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSE3)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSSE3)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
|
||||||
|
endif()
|
||||||
|
if (DEFINED HAVE_SSE4_1)
|
||||||
|
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
if (DEFINED BINARY)
|
if (DEFINED BINARY)
|
||||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -142,14 +142,8 @@ REALNAME:
|
||||||
#define HUGE_PAGESIZE ( 4 << 20)
|
#define HUGE_PAGESIZE ( 4 << 20)
|
||||||
|
|
||||||
#ifndef BUFFERSIZE
|
#ifndef BUFFERSIZE
|
||||||
#if defined(CORTEXA57)
|
|
||||||
#define BUFFER_SIZE (20 << 20)
|
|
||||||
#elif defined(TSV110) || defined(EMAG8180)
|
|
||||||
#define BUFFER_SIZE (32 << 20)
|
#define BUFFER_SIZE (32 << 20)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE (16 << 20)
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#define BUFFER_SIZE (32 << BUFFERSIZE)
|
#define BUFFER_SIZE (32 << BUFFERSIZE)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -250,6 +250,8 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||||
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||||
xdouble *, blasint *, xdouble *, blasint *);
|
xdouble *, blasint *, xdouble *, blasint *);
|
||||||
|
|
||||||
|
void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *,
|
||||||
|
bfloat16 *, blasint *, float *, float *, blasint *);
|
||||||
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||||
float *, blasint *, float *, float *, blasint *);
|
float *, blasint *, float *, float *, blasint *);
|
||||||
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,
|
||||||
|
|
|
@ -44,6 +44,10 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||||
|
int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||||
|
int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
|
||||||
|
int sbgemv_thread_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
|
||||||
int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||||
int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||||
int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);
|
||||||
|
|
|
@ -646,10 +646,12 @@
|
||||||
|
|
||||||
#elif defined(BFLOAT16)
|
#elif defined(BFLOAT16)
|
||||||
|
|
||||||
#define D_TO_BF16_K SBDTOBF16_K
|
#define D_TO_BF16_K SBDTOBF16_K
|
||||||
#define D_BF16_TO_K DBF16TOD_K
|
#define D_BF16_TO_K DBF16TOD_K
|
||||||
#define S_TO_BF16_K SBSTOBF16_K
|
#define S_TO_BF16_K SBSTOBF16_K
|
||||||
#define S_BF16_TO_K SBF16TOS_K
|
#define S_BF16_TO_K SBF16TOS_K
|
||||||
|
#define SBGEMV_N SBGEMV_N_K
|
||||||
|
#define SBGEMV_T SBGEMV_T_K
|
||||||
|
|
||||||
#define AMAX_K SAMAX_K
|
#define AMAX_K SAMAX_K
|
||||||
#define AMIN_K SAMIN_K
|
#define AMIN_K SAMIN_K
|
||||||
|
|
|
@ -78,8 +78,8 @@ BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG);
|
||||||
int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||||
int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||||
|
|
||||||
int (*sbgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||||
int (*sbgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||||
int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||||
|
|
||||||
int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||||
|
|
|
@ -844,8 +844,8 @@ Lmcount$lazy_ptr:
|
||||||
#define BUFFER_SIZE ( 2 << 20)
|
#define BUFFER_SIZE ( 2 << 20)
|
||||||
#elif defined(PPC440FP2)
|
#elif defined(PPC440FP2)
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#elif defined(POWER8) || defined(POWER9) || defined(POWER10)
|
#elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10)
|
||||||
#define BUFFER_SIZE ( 64 << 20)
|
#define BUFFER_SIZE ( 64 << 22)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -8,6 +8,8 @@
|
||||||
#define SBDTOBF16_K sbdtobf16_k
|
#define SBDTOBF16_K sbdtobf16_k
|
||||||
#define SBF16TOS_K sbf16tos_k
|
#define SBF16TOS_K sbf16tos_k
|
||||||
#define DBF16TOD_K dbf16tod_k
|
#define DBF16TOD_K dbf16tod_k
|
||||||
|
#define SBGEMV_N_K sbgemv_n
|
||||||
|
#define SBGEMV_T_K sbgemv_t
|
||||||
|
|
||||||
#define SBGEMM_ONCOPY sbgemm_oncopy
|
#define SBGEMM_ONCOPY sbgemm_oncopy
|
||||||
#define SBGEMM_OTCOPY sbgemm_otcopy
|
#define SBGEMM_OTCOPY sbgemm_otcopy
|
||||||
|
@ -29,6 +31,8 @@
|
||||||
#define SBDTOBF16_K gotoblas -> sbdtobf16_k
|
#define SBDTOBF16_K gotoblas -> sbdtobf16_k
|
||||||
#define SBF16TOS_K gotoblas -> sbf16tos_k
|
#define SBF16TOS_K gotoblas -> sbf16tos_k
|
||||||
#define DBF16TOD_K gotoblas -> dbf16tod_k
|
#define DBF16TOD_K gotoblas -> dbf16tod_k
|
||||||
|
#define SBGEMV_N_K gotoblas -> sbgemv_n
|
||||||
|
#define SBGEMV_T_K gotoblas -> sbgemv_t
|
||||||
|
|
||||||
#define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy
|
#define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy
|
||||||
#define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy
|
#define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy
|
||||||
|
|
|
@ -424,7 +424,7 @@ void get_cpuconfig(void)
|
||||||
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
|
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
|
||||||
printf("#define L1_DATA_SIZE %d \n",value);
|
printf("#define L1_DATA_SIZE %d \n",value);
|
||||||
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
|
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
|
||||||
printf("#define L2_DATA_SIZE %d \n",value);
|
printf("#define L2_SIZE %d \n",value);
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
16
cpuid_x86.c
16
cpuid_x86.c
|
@ -202,7 +202,7 @@ int support_avx(){
|
||||||
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
|
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
|
||||||
xgetbv(0, &eax, &edx);
|
xgetbv(0, &eax, &edx);
|
||||||
if((eax & 6) == 6){
|
if((eax & 6) == 6){
|
||||||
ret=1; //OS support AVX
|
ret=1; //OS supports saving xmm and ymm registers (6 = (1<<1) | (1<<2))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -219,8 +219,8 @@ int support_avx2(){
|
||||||
if (!support_avx())
|
if (!support_avx())
|
||||||
return 0;
|
return 0;
|
||||||
cpuid(7, &eax, &ebx, &ecx, &edx);
|
cpuid(7, &eax, &ebx, &ecx, &edx);
|
||||||
if((ebx & (1<<7)) != 0)
|
if((ebx & (1<<5)) != 0)
|
||||||
ret=1; //OS supports AVX2
|
ret=1; //CPU supports AVX2
|
||||||
return ret;
|
return ret;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -235,14 +235,14 @@ int support_avx512(){
|
||||||
if (!support_avx())
|
if (!support_avx())
|
||||||
return 0;
|
return 0;
|
||||||
cpuid(7, &eax, &ebx, &ecx, &edx);
|
cpuid(7, &eax, &ebx, &ecx, &edx);
|
||||||
if((ebx & 32) != 32){
|
if((ebx & (1<<5)) == 0){
|
||||||
ret=0; //OS does not even support AVX2
|
ret=0; //cpu does not have avx2 flag
|
||||||
}
|
}
|
||||||
if((ebx & (1<<31)) != 0){
|
if((ebx & (1<<31)) != 0){ //AVX512VL flag
|
||||||
xgetbv(0, &eax, &edx);
|
xgetbv(0, &eax, &edx);
|
||||||
if((eax & 0xe0) == 0xe0)
|
if((eax & 0xe0) == 0xe0)
|
||||||
ret=1; //OS supports AVX512VL
|
ret=1; //OS supports saving zmm registers
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -413,7 +413,13 @@ XBLASOBJS += \
|
||||||
xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUN.$(SUFFIX) \
|
xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUN.$(SUFFIX) \
|
||||||
xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLN.$(SUFFIX) \
|
xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLN.$(SUFFIX) \
|
||||||
xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \
|
xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \
|
||||||
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX) \
|
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
|
SBBLASOBJS += \
|
||||||
|
sbgemv_thread_n$(TSUFFIX).$(SUFFIX) \
|
||||||
|
sbgemv_thread_t$(TSUFFIX).$(SUFFIX)
|
||||||
|
endif
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -3693,4 +3699,12 @@ xtrsv_CUU.$(SUFFIX) xtrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
|
||||||
xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
|
xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
|
||||||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
|
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
|
sbgemv_thread_n.$(SUFFIX) sbgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
|
||||||
|
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
|
||||||
|
sbgemv_thread_t.$(SUFFIX) sbgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h
|
||||||
|
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
include ../../Makefile.tail
|
include ../../Makefile.tail
|
||||||
|
|
|
@ -0,0 +1,149 @@
|
||||||
|
/*********************************************************************/
|
||||||
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* All rights reserved. */
|
||||||
|
/* */
|
||||||
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
/* without modification, are permitted provided that the following */
|
||||||
|
/* conditions are met: */
|
||||||
|
/* */
|
||||||
|
/* 1. Redistributions of source code must retain the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer. */
|
||||||
|
/* */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer in the documentation and/or other materials */
|
||||||
|
/* provided with the distribution. */
|
||||||
|
/* */
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||||
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||||
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||||
|
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||||
|
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||||
|
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||||
|
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||||
|
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||||
|
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||||
|
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/* */
|
||||||
|
/* The views and conclusions contained in the software and */
|
||||||
|
/* documentation are those of the authors and should not be */
|
||||||
|
/* interpreted as representing official policies, either expressed */
|
||||||
|
/* or implied, of The University of Texas at Austin. */
|
||||||
|
/*********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#ifndef TRANSA
|
||||||
|
#define SBGEMV SBGEMV_N
|
||||||
|
#else
|
||||||
|
#define SBGEMV SBGEMV_T
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *dummy2, BLASLONG dummy3){
|
||||||
|
|
||||||
|
bfloat16 *a, *x;
|
||||||
|
float *y;
|
||||||
|
BLASLONG lda, incx, incy;
|
||||||
|
BLASLONG m_from, m_to, n_from, n_to;
|
||||||
|
|
||||||
|
a = (bfloat16 *)args->a;
|
||||||
|
x = (bfloat16 *)args->b;
|
||||||
|
y = (float *)args->c;
|
||||||
|
|
||||||
|
lda = args->lda;
|
||||||
|
incx = args->ldb;
|
||||||
|
incy = args->ldc;
|
||||||
|
|
||||||
|
#ifndef TRANSA // N
|
||||||
|
m_from = *(range_m + 0);
|
||||||
|
m_to = *(range_m + 1);
|
||||||
|
n_from = 0;
|
||||||
|
n_to = args -> n;
|
||||||
|
a += m_from;
|
||||||
|
y += m_from * incy;
|
||||||
|
#else // T
|
||||||
|
m_from = 0;
|
||||||
|
m_to = args->m;
|
||||||
|
n_from = *(range_n + 0);
|
||||||
|
n_to = *(range_n + 1);
|
||||||
|
a += n_from * lda;
|
||||||
|
y += n_from * incy;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
SBGEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int CNAME(BLASLONG m, BLASLONG n, float alpha, bfloat16 *a, BLASLONG lda, bfloat16 *x, BLASLONG incx, float beta, float *y, BLASLONG incy, int threads)
|
||||||
|
{
|
||||||
|
blas_arg_t args;
|
||||||
|
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||||
|
BLASLONG range[MAX_CPU_NUMBER + 1];
|
||||||
|
|
||||||
|
#ifndef TRANSA
|
||||||
|
BLASLONG width_for_split = m;
|
||||||
|
#else
|
||||||
|
BLASLONG width_for_split = n;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
BLASLONG BLOCK_WIDTH = width_for_split/threads;
|
||||||
|
|
||||||
|
int mode = BLAS_BFLOAT16 | BLAS_REAL;
|
||||||
|
|
||||||
|
args.m = m;
|
||||||
|
args.n = n;
|
||||||
|
args.a = (void *)a;
|
||||||
|
args.b = (void *)x;
|
||||||
|
args.c = (void *)y;
|
||||||
|
args.lda = lda;
|
||||||
|
args.ldb = incx;
|
||||||
|
args.ldc = incy;
|
||||||
|
args.alpha = (void *)α
|
||||||
|
args.beta = (void *)β
|
||||||
|
|
||||||
|
range[0] = 0;
|
||||||
|
|
||||||
|
int thread_idx;
|
||||||
|
|
||||||
|
for (thread_idx=0; thread_idx<threads; thread_idx++) {
|
||||||
|
if (thread_idx != threads-1) {
|
||||||
|
range[thread_idx + 1] = range[thread_idx] + BLOCK_WIDTH;
|
||||||
|
} else {
|
||||||
|
range[thread_idx + 1] = range[thread_idx] + width_for_split;
|
||||||
|
}
|
||||||
|
|
||||||
|
queue[thread_idx].mode = mode;
|
||||||
|
queue[thread_idx].routine = sbgemv_kernel;
|
||||||
|
queue[thread_idx].args = &args;
|
||||||
|
#ifndef TRANSA
|
||||||
|
queue[thread_idx].range_m = &range[thread_idx];
|
||||||
|
queue[thread_idx].range_n = NULL;
|
||||||
|
#else
|
||||||
|
queue[thread_idx].range_m = NULL;
|
||||||
|
queue[thread_idx].range_n = &range[thread_idx];
|
||||||
|
#endif
|
||||||
|
queue[thread_idx].sa = NULL;
|
||||||
|
queue[thread_idx].sb = NULL;
|
||||||
|
queue[thread_idx].next = &queue[thread_idx + 1];
|
||||||
|
|
||||||
|
width_for_split -= BLOCK_WIDTH;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (thread_idx) {
|
||||||
|
queue[0].sa = NULL;
|
||||||
|
queue[0].sb = NULL;
|
||||||
|
queue[thread_idx - 1].next = NULL;
|
||||||
|
|
||||||
|
exec_blas(thread_idx, queue);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -80,7 +80,7 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
mode |= BLAS_LEGACY;
|
if(!(mode & BLAS_PTHREAD)) mode |= BLAS_LEGACY;
|
||||||
|
|
||||||
for (i = 0; i < nthreads; i++) blas_queue_init(&queue[i]);
|
for (i = 0; i < nthreads; i++) blas_queue_init(&queue[i]);
|
||||||
|
|
||||||
|
|
|
@ -352,7 +352,6 @@ fprintf(stderr,"UNHANDLED COMPLEX\n");
|
||||||
/* Other types in future */
|
/* Other types in future */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!sb) fprintf(stderr,"SB not declared!!!\n");
|
|
||||||
queue->sb=sb;
|
queue->sb=sb;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -476,12 +476,15 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
|
||||||
|
|
||||||
routine = queue -> routine;
|
routine = queue -> routine;
|
||||||
|
|
||||||
if (!(queue -> mode & BLAS_LEGACY)) {
|
if (queue -> mode & BLAS_LEGACY) {
|
||||||
|
legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
|
||||||
|
} else
|
||||||
|
if (queue -> mode & BLAS_PTHREAD) {
|
||||||
|
void (*pthreadcompat)(void *) = queue -> routine;
|
||||||
|
(pthreadcompat)(queue -> args);
|
||||||
|
} else
|
||||||
(routine)(queue -> args, queue -> range_m, queue -> range_n,
|
(routine)(queue -> args, queue -> range_m, queue -> range_n,
|
||||||
queue -> sa, queue -> sb, 0);
|
queue -> sa, queue -> sb, 0);
|
||||||
} else {
|
|
||||||
legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);
|
if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);
|
||||||
|
|
||||||
|
|
|
@ -330,8 +330,8 @@ int support_avx2(){
|
||||||
if (!support_avx())
|
if (!support_avx())
|
||||||
return 0;
|
return 0;
|
||||||
cpuid(7, &eax, &ebx, &ecx, &edx);
|
cpuid(7, &eax, &ebx, &ecx, &edx);
|
||||||
if((ebx & (1<<7)) != 0)
|
if((ebx & (1<<5)) != 0)
|
||||||
ret=1; //OS supports AVX2
|
ret=1; //AVX2 flag is set
|
||||||
return ret;
|
return ret;
|
||||||
#else
|
#else
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -346,13 +346,13 @@ int support_avx512(){
|
||||||
if (!support_avx())
|
if (!support_avx())
|
||||||
return 0;
|
return 0;
|
||||||
cpuid(7, &eax, &ebx, &ecx, &edx);
|
cpuid(7, &eax, &ebx, &ecx, &edx);
|
||||||
if((ebx & (1<<7)) == 0){
|
if((ebx & (1<<5)) == 0){
|
||||||
ret=0; //OS does not even support AVX2
|
ret=0; //cpu does not have avx2 flag
|
||||||
}
|
}
|
||||||
if((ebx & (1u<<31)) != 0){
|
if((ebx & (1<<31)) != 0){ //AVX512VL flag is set
|
||||||
xgetbv(0, &eax, &edx);
|
xgetbv(0, &eax, &edx);
|
||||||
if((eax & 0xe0) == 0xe0)
|
if((eax & 0xe0) == 0xe0)
|
||||||
ret=1; //OS supports AVX512VL
|
ret=1; //OS supports saving zmm register
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -139,19 +139,30 @@ static gotoblas_t *force_coretype(char *coretype) {
|
||||||
|
|
||||||
static gotoblas_t *get_coretype(void) {
|
static gotoblas_t *get_coretype(void) {
|
||||||
int implementer, variant, part, arch, revision, midr_el1;
|
int implementer, variant, part, arch, revision, midr_el1;
|
||||||
|
char coremsg[128];
|
||||||
|
|
||||||
|
#if (!defined OS_LINUX && !defined OS_ANDROID)
|
||||||
|
return NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
#if (defined OS_LINUX || defined OS_ANDROID)
|
|
||||||
if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
|
if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
|
||||||
char coremsg[128];
|
#ifdef __linux
|
||||||
|
FILE *infile;
|
||||||
|
char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
|
||||||
|
p = (char *) NULL ;
|
||||||
|
infile = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1","r");
|
||||||
|
if (!infile) return NULL;
|
||||||
|
fgets(buffer, sizeof(buffer), infile);
|
||||||
|
midr_el1=strtoul(buffer,NULL,16);
|
||||||
|
fclose(infile);
|
||||||
|
#else
|
||||||
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
|
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
|
||||||
openblas_warning(1, coremsg);
|
openblas_warning(1, coremsg);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
#else
|
|
||||||
return NULL;
|
|
||||||
#endif
|
#endif
|
||||||
|
} else {
|
||||||
get_cpu_ftr(MIDR_EL1, midr_el1);
|
get_cpu_ftr(MIDR_EL1, midr_el1);
|
||||||
|
}
|
||||||
/*
|
/*
|
||||||
* MIDR_EL1
|
* MIDR_EL1
|
||||||
*
|
*
|
||||||
|
@ -219,6 +230,9 @@ static gotoblas_t *get_coretype(void) {
|
||||||
return &gotoblas_FALKOR;
|
return &gotoblas_FALKOR;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
|
||||||
|
openblas_warning(1, coremsg);
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,10 +6,10 @@ extern gotoblas_t gotoblas_POWER8;
|
||||||
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
#if (!defined __GNUC__) || ( __GNUC__ >= 6)
|
||||||
extern gotoblas_t gotoblas_POWER9;
|
extern gotoblas_t gotoblas_POWER9;
|
||||||
#endif
|
#endif
|
||||||
#if (!defined __GNUC__) || ( __GNUC__ >= 11) \
|
//#if (!defined __GNUC__) || ( __GNUC__ >= 11) \
|
||||||
|| (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
|
// || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
|
||||||
#define HAVE_P10_SUPPORT 1
|
//#define HAVE_P10_SUPPORT 1
|
||||||
#endif
|
//#endif
|
||||||
#ifdef HAVE_P10_SUPPORT
|
#ifdef HAVE_P10_SUPPORT
|
||||||
extern gotoblas_t gotoblas_POWER10;
|
extern gotoblas_t gotoblas_POWER10;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1767,11 +1767,11 @@ int get_num_procs(void);
|
||||||
int get_num_procs(void) {
|
int get_num_procs(void) {
|
||||||
|
|
||||||
static int nums = 0;
|
static int nums = 0;
|
||||||
|
|
||||||
|
#if defined(__GLIBC_PREREQ)
|
||||||
cpu_set_t cpuset,*cpusetp;
|
cpu_set_t cpuset,*cpusetp;
|
||||||
size_t size;
|
size_t size;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
#if defined(__GLIBC_PREREQ)
|
|
||||||
#if !__GLIBC_PREREQ(2, 7)
|
#if !__GLIBC_PREREQ(2, 7)
|
||||||
int i;
|
int i;
|
||||||
#if !__GLIBC_PREREQ(2, 6)
|
#if !__GLIBC_PREREQ(2, 6)
|
||||||
|
|
|
@ -120,10 +120,10 @@ dll : ../$(LIBDLLNAME)
|
||||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
|
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
|
||||||
|
|
||||||
$(LIBPREFIX).def : gensymbol
|
$(LIBPREFIX).def : gensymbol
|
||||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
libgoto_hpl.def : gensymbol
|
libgoto_hpl.def : gensymbol
|
||||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
ifeq ($(OSNAME), Darwin)
|
ifeq ($(OSNAME), Darwin)
|
||||||
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||||
|
@ -258,16 +258,16 @@ static : ../$(LIBNAME)
|
||||||
rm -f goto.$(SUFFIX)
|
rm -f goto.$(SUFFIX)
|
||||||
|
|
||||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
test : linktest.c
|
test : linktest.c
|
||||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||||
|
|
|
@ -50,8 +50,8 @@
|
||||||
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
|
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
|
||||||
zgeadd, dzsum);
|
zgeadd, dzsum);
|
||||||
|
|
||||||
@cblasobjs = (lsame, xerbla);
|
@blasobjs = (lsame, xerbla);
|
||||||
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
|
@bfblasobjs = (sbgemm, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
|
||||||
@cblasobjsc = (
|
@cblasobjsc = (
|
||||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
||||||
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
|
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
|
||||||
|
@ -72,7 +72,7 @@
|
||||||
);
|
);
|
||||||
|
|
||||||
@cblasobjss = (
|
@cblasobjss = (
|
||||||
cblas_sasum, cblas_saxpy,
|
cblas_sasum, cblas_saxpy, cblas_saxpby,
|
||||||
cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm,
|
cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm,
|
||||||
cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg,
|
cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg,
|
||||||
cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr,
|
cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr,
|
||||||
|
@ -94,7 +94,7 @@
|
||||||
|
|
||||||
@cblasobjs = ( cblas_xerbla );
|
@cblasobjs = ( cblas_xerbla );
|
||||||
|
|
||||||
@halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
|
@bfcblasobjs = (cblas_sbgemm, cblas_sbgemv, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
|
||||||
|
|
||||||
@exblasobjs = (
|
@exblasobjs = (
|
||||||
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
|
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
|
||||||
|
@ -415,7 +415,7 @@ zpotri,
|
||||||
cgeqrt, cgeqrt2, cgeqrt3, cgemqrt,
|
cgeqrt, cgeqrt2, cgeqrt3, cgemqrt,
|
||||||
ctpqrt, ctpqrt2, ctpmqrt, ctprfb,
|
ctpqrt, ctpqrt2, ctpmqrt, ctprfb,
|
||||||
);
|
);
|
||||||
@lapack2objszc = (
|
@lapackobjs2zc = (
|
||||||
# ZCLASRC -- Double-single mixed precision complex routines called from
|
# ZCLASRC -- Double-single mixed precision complex routines called from
|
||||||
# single, single-extra and double precision complex LAPACK
|
# single, single-extra and double precision complex LAPACK
|
||||||
# routines (i.e. from CLASRC, CXLASRC, ZLASRC).
|
# routines (i.e. from CLASRC, CXLASRC, ZLASRC).
|
||||||
|
@ -425,7 +425,7 @@ zpotri,
|
||||||
cpotrs,
|
cpotrs,
|
||||||
);
|
);
|
||||||
|
|
||||||
@lapack2objsd = (
|
@lapackobjs2d = (
|
||||||
# DLASRC -- Double precision real LAPACK routines
|
# DLASRC -- Double precision real LAPACK routines
|
||||||
# already provided by @lapackobjs:
|
# already provided by @lapackobjs:
|
||||||
# dgesv, dgetf2, dgetrs, dlaswp, dlauu2, dlauum, dpotf2, dpotrf, dpotri,
|
# dgesv, dgetf2, dgetrs, dlaswp, dlauu2, dlauum, dpotf2, dpotrf, dpotri,
|
||||||
|
@ -568,7 +568,7 @@ zpotri,
|
||||||
);
|
);
|
||||||
# functions added for lapack-3.6.0
|
# functions added for lapack-3.6.0
|
||||||
|
|
||||||
@lapack2objsc = ( @lapack2objsc,
|
@lapackobjs2c = ( @lapackobjs2c,
|
||||||
cgejsv,
|
cgejsv,
|
||||||
cgesvdx,
|
cgesvdx,
|
||||||
cgesvj,
|
cgesvj,
|
||||||
|
@ -604,7 +604,7 @@ zpotri,
|
||||||
csyr2,
|
csyr2,
|
||||||
cunm22,
|
cunm22,
|
||||||
);
|
);
|
||||||
@lapackobjs2d = (@lapack2objsd,
|
@lapackobjs2d = (@lapackobjs2d,
|
||||||
dbdsvdx,
|
dbdsvdx,
|
||||||
dgesvdx,
|
dgesvdx,
|
||||||
dgetrf2,
|
dgetrf2,
|
||||||
|
@ -637,7 +637,7 @@ zpotri,
|
||||||
dpotrf2,
|
dpotrf2,
|
||||||
dsecnd,
|
dsecnd,
|
||||||
);
|
);
|
||||||
@lapack2objss = (@lapack2objss,
|
@lapackobjs2s = (@lapackobjs2s,
|
||||||
sbdsvdx,
|
sbdsvdx,
|
||||||
second,
|
second,
|
||||||
sgesvdx,
|
sgesvdx,
|
||||||
|
@ -670,7 +670,7 @@ zpotri,
|
||||||
sorm22,
|
sorm22,
|
||||||
spotrf2,
|
spotrf2,
|
||||||
);
|
);
|
||||||
@lapack2objsz = (@lapack2objsz,
|
@lapackobjs2z = (@lapackobjs2z,
|
||||||
zgejsv,
|
zgejsv,
|
||||||
zgesvdx,
|
zgesvdx,
|
||||||
zgesvj,
|
zgesvj,
|
||||||
|
@ -707,7 +707,7 @@ zpotri,
|
||||||
zunm22,
|
zunm22,
|
||||||
);
|
);
|
||||||
# functions added for lapack-3.7.0
|
# functions added for lapack-3.7.0
|
||||||
@lapack2objss = (@lapack2objss,
|
@lapackobjs2s = (@lapackobjs2s,
|
||||||
slarfy,
|
slarfy,
|
||||||
strevc3,
|
strevc3,
|
||||||
sgelqt,
|
sgelqt,
|
||||||
|
@ -726,7 +726,7 @@ zpotri,
|
||||||
stplqt2,
|
stplqt2,
|
||||||
stpmlqt,
|
stpmlqt,
|
||||||
);
|
);
|
||||||
@lapack2objsd = (@lapack2objsd,
|
@lapackobjs2d = (@lapackobjs2d,
|
||||||
dlarfy,
|
dlarfy,
|
||||||
dsyconvf,
|
dsyconvf,
|
||||||
dtrevc3,
|
dtrevc3,
|
||||||
|
@ -746,7 +746,7 @@ zpotri,
|
||||||
dtplqt2,
|
dtplqt2,
|
||||||
dtpmlqt,
|
dtpmlqt,
|
||||||
);
|
);
|
||||||
@lapack2objsc = (@lapack2objsc,
|
@lapackobjs2c = (@lapackobjs2c,
|
||||||
clarfy,
|
clarfy,
|
||||||
csyconvf,
|
csyconvf,
|
||||||
ctrevc3,
|
ctrevc3,
|
||||||
|
@ -766,7 +766,7 @@ zpotri,
|
||||||
ctplqt2,
|
ctplqt2,
|
||||||
ctpmlqt,
|
ctpmlqt,
|
||||||
);
|
);
|
||||||
@lapack2objsz = (@lapack2objsz,
|
@lapackobjs2z = (@lapackobjs2z,
|
||||||
zlarfy,
|
zlarfy,
|
||||||
zsyconvf,
|
zsyconvf,
|
||||||
ztrevc3,
|
ztrevc3,
|
||||||
|
@ -786,31 +786,31 @@ zpotri,
|
||||||
zlamswlq,
|
zlamswlq,
|
||||||
zgemlq,
|
zgemlq,
|
||||||
);
|
);
|
||||||
@lapack2objs = (@lapack2objs,
|
@lapackobjs2s = (@lapackobjs2s,
|
||||||
sladiv1,
|
sladiv1);
|
||||||
dladiv1,
|
@lapackobjs2d = (@lapackobjs2d,
|
||||||
|
dladiv1);
|
||||||
|
@lapackobjs = (@lapackobjs,
|
||||||
iparam2stage,
|
iparam2stage,
|
||||||
|
|
||||||
# functions added for lapack-3.8.0
|
# functions added for lapack-3.8.0
|
||||||
|
|
||||||
ilaenv2stage,
|
ilaenv2stage,
|
||||||
);
|
);
|
||||||
# functions added for lapack-3.9.0
|
# functions added for lapack-3.9.0
|
||||||
@lapack2objsc = (@lapack2objsc,
|
@lapackobjs2c = (@lapackobjs2c,
|
||||||
cgesvdq,
|
cgesvdq,
|
||||||
cungtsqr,
|
cungtsqr
|
||||||
dcombssq,
|
|
||||||
);
|
);
|
||||||
@lapack2objsd = (@lapack2objsd,
|
@lapackobjs2d = (@lapackobjs2d,
|
||||||
|
dcombssq,
|
||||||
dgesvdq,
|
dgesvdq,
|
||||||
dorgtsqr,
|
dorgtsqr,
|
||||||
);
|
);
|
||||||
@lapack2objss = (@lapack2objss,
|
@lapackobjs2s = (@lapackobjs2s,
|
||||||
scombssq,
|
scombssq,
|
||||||
sgesvdq,
|
sgesvdq,
|
||||||
sorgtsqr,
|
sorgtsqr,
|
||||||
);
|
);
|
||||||
@lapack2objsz = (@lapack2objsz,
|
@lapackobjs2z = (@lapackobjs2z,
|
||||||
zgesvdq,
|
zgesvdq,
|
||||||
zungtsqr
|
zungtsqr
|
||||||
);
|
);
|
||||||
|
@ -835,10 +835,29 @@ zpotri,
|
||||||
dlatzm, dtzrqf);
|
dlatzm, dtzrqf);
|
||||||
|
|
||||||
@lapack_deprecated_objss = (
|
@lapack_deprecated_objss = (
|
||||||
|
sgelsx,
|
||||||
sgegs,
|
sgegs,
|
||||||
sgegv,
|
sgegv,
|
||||||
|
sgeqpf,
|
||||||
|
sggsvd,
|
||||||
|
sggsvp,
|
||||||
|
slahrd,
|
||||||
|
slatzm,
|
||||||
|
stzrqf
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@lapack_deprecated_objsz = (
|
||||||
|
zgegs,
|
||||||
|
zgegv,
|
||||||
|
zgelsx,
|
||||||
|
zgeqpf,
|
||||||
|
zggsvd,
|
||||||
|
zggsvp,
|
||||||
|
zlahrd,
|
||||||
|
zlatzm,
|
||||||
|
ztzrqf
|
||||||
|
);
|
||||||
|
|
||||||
@lapacke_deprecated_objsc = (
|
@lapacke_deprecated_objsc = (
|
||||||
LAPACKE_cggsvp,
|
LAPACKE_cggsvp,
|
||||||
LAPACKE_cggsvp_work,
|
LAPACKE_cggsvp_work,
|
||||||
|
@ -3590,14 +3609,18 @@ use File::Basename;
|
||||||
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib");
|
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib");
|
||||||
|
|
||||||
if ($ARGV[12] == 1) {
|
if ($ARGV[12] == 1) {
|
||||||
@blasobjs = (@blasobjs, @halfblasobjs);
|
@blasobjs = (@blasobjs, @bfblasobjs);
|
||||||
@cblasobjs = (@cblasobjs, @halfcblasobjs);
|
@cblasobjs = (@cblasobjs, @bfcblasobjs);
|
||||||
}
|
}
|
||||||
if ($ARGV[13] == 1) {
|
if ($ARGV[13] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjss);
|
@blasobjs = (@blasobjs, @blasobjss);
|
||||||
@cblasobjs = (@cblasobjs, @cblasobjss);
|
@cblasobjs = (@cblasobjs, @cblasobjss);
|
||||||
@lapackobjs = (@lapackobjs, @lapackobjss);
|
@lapackobjs = (@lapackobjs, @lapackobjss);
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objss);
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2s);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2ds);
|
||||||
|
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objss);
|
||||||
|
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objss);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
|
||||||
}
|
}
|
||||||
|
@ -3605,7 +3628,12 @@ if ($ARGV[14] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjsd);
|
@blasobjs = (@blasobjs, @blasobjsd);
|
||||||
@cblasobjs = (@cblasobjs, @cblasobjsd);
|
@cblasobjs = (@cblasobjs, @cblasobjsd);
|
||||||
@lapackobjs = (@lapackobjs, @lapackobjsd);
|
@lapackobjs = (@lapackobjs, @lapackobjsd);
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objsd);
|
if ($ARGV[13] == 0) {
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2ds);
|
||||||
|
}
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d, @lapackobjs2dz);
|
||||||
|
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsd);
|
||||||
|
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsd);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
|
||||||
}
|
}
|
||||||
|
@ -3613,9 +3641,14 @@ if ($ARGV[15] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjsc);
|
@blasobjs = (@blasobjs, @blasobjsc);
|
||||||
@cblasobjs = (@cblasobjs, @cblasobjsc);
|
@cblasobjs = (@cblasobjs, @cblasobjsc);
|
||||||
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsc);
|
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsc);
|
||||||
@cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsc);
|
@cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsc);
|
||||||
@lapackobjs = (@lapackobjs, @lapackobjsc);
|
@lapackobjs = (@lapackobjs, @lapackobjsc);
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc);
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2c, @lapackobjs2zc);
|
||||||
|
if ($ARGV[13] == 0) {
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc);
|
||||||
|
}
|
||||||
|
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsc);
|
||||||
|
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsc);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
|
||||||
}
|
}
|
||||||
|
@ -3623,9 +3656,17 @@ if ($ARGV[16] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjsz);
|
@blasobjs = (@blasobjs, @blasobjsz);
|
||||||
@cblasobjs = (@cblasobjs, @cblasobjsz);
|
@cblasobjs = (@cblasobjs, @cblasobjsz);
|
||||||
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsz);
|
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsz);
|
||||||
@cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsz);
|
@cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsz);
|
||||||
@lapackobjs = (@lapackobjs, @lapackobjsz);
|
@lapackobjs = (@lapackobjs, @lapackobjsz);
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc);
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2z);
|
||||||
|
if ($ARGV[15] == 0) {
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2zc);
|
||||||
|
}
|
||||||
|
if ($ARGV[14] == 0) {
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz);
|
||||||
|
}
|
||||||
|
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsz);
|
||||||
|
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsz);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
|
||||||
}
|
}
|
||||||
|
|
2
f_check
2
f_check
|
@ -33,7 +33,7 @@ if ($compiler eq "") {
|
||||||
"ppuf77", "ppuf95", "ppuf90", "ppuxlf",
|
"ppuf77", "ppuf95", "ppuf90", "ppuxlf",
|
||||||
"pathf90", "pathf95",
|
"pathf90", "pathf95",
|
||||||
"pgf95", "pgf90", "pgf77",
|
"pgf95", "pgf90", "pgf77",
|
||||||
"flang",
|
"flang", "egfortran",
|
||||||
"ifort");
|
"ifort");
|
||||||
|
|
||||||
OUTER:
|
OUTER:
|
||||||
|
|
61
getarch.c
61
getarch.c
|
@ -330,7 +330,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||||
"-DFMA3"
|
"-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
|
||||||
#define LIBNAME "haswell"
|
#define LIBNAME "haswell"
|
||||||
#define CORENAME "HASWELL"
|
#define CORENAME "HASWELL"
|
||||||
#endif
|
#endif
|
||||||
|
@ -346,7 +346,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||||
"-DFMA3"
|
"-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
|
||||||
#define LIBNAME "haswell"
|
#define LIBNAME "haswell"
|
||||||
#define CORENAME "HASWELL"
|
#define CORENAME "HASWELL"
|
||||||
#else
|
#else
|
||||||
|
@ -359,7 +359,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||||
"-DFMA3 -DHAVE_AVX512VL -march=skylake-avx512"
|
"-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -march=skylake-avx512"
|
||||||
#define LIBNAME "skylakex"
|
#define LIBNAME "skylakex"
|
||||||
#define CORENAME "SKYLAKEX"
|
#define CORENAME "SKYLAKEX"
|
||||||
#endif
|
#endif
|
||||||
|
@ -376,7 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||||
"-DFMA3"
|
"-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
|
||||||
#define LIBNAME "haswell"
|
#define LIBNAME "haswell"
|
||||||
#define CORENAME "HASWELL"
|
#define CORENAME "HASWELL"
|
||||||
#else
|
#else
|
||||||
|
@ -389,7 +389,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||||
"-DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=cooperlake"
|
"-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=cooperlake"
|
||||||
#define LIBNAME "cooperlake"
|
#define LIBNAME "cooperlake"
|
||||||
#define CORENAME "COOPERLAKE"
|
#define CORENAME "COOPERLAKE"
|
||||||
#endif
|
#endif
|
||||||
|
@ -559,7 +559,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||||
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
|
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
|
||||||
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
|
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
|
||||||
"-DHAVE_AVX -DHAVE_FMA3 -DFMA3"
|
"-DHAVE_AVX -DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
|
||||||
#define LIBNAME "zen"
|
#define LIBNAME "zen"
|
||||||
#define CORENAME "ZEN"
|
#define CORENAME "ZEN"
|
||||||
#endif
|
#endif
|
||||||
|
@ -1236,6 +1236,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_VORTEX
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "VORTEX"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DVORTEX " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||||
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
|
#define LIBNAME "vortex"
|
||||||
|
#define CORENAME "VORTEX"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_ZARCH_GENERIC
|
#ifdef FORCE_ZARCH_GENERIC
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define ARCHITECTURE "ZARCH"
|
#define ARCHITECTURE "ZARCH"
|
||||||
|
@ -1409,8 +1423,41 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
printf("NUM_CORES=%d\n", get_num_cores());
|
printf("NUM_CORES=%d\n", get_num_cores());
|
||||||
|
|
||||||
#if defined(__arm__) && !defined(FORCE)
|
#if defined(__arm__)
|
||||||
|
#if !defined(FORCE)
|
||||||
|
fprintf(stderr,"get features!\n");
|
||||||
get_features();
|
get_features();
|
||||||
|
#else
|
||||||
|
fprintf(stderr,"split archconfig!\n");
|
||||||
|
sprintf(buffer, "%s", ARCHCONFIG);
|
||||||
|
|
||||||
|
p = &buffer[0];
|
||||||
|
|
||||||
|
while (*p) {
|
||||||
|
if ((*p == '-') && (*(p + 1) == 'D')) {
|
||||||
|
p += 2;
|
||||||
|
if (*p != 'H') {
|
||||||
|
while( (*p != ' ') && (*p != '-') && (*p != '\0') && (*p != '\n')) {p++; }
|
||||||
|
if (*p == '-') continue;
|
||||||
|
}
|
||||||
|
while ((*p != ' ') && (*p != '\0')) {
|
||||||
|
|
||||||
|
if (*p == '=') {
|
||||||
|
printf("=");
|
||||||
|
p ++;
|
||||||
|
while ((*p != ' ') && (*p != '\0')) {
|
||||||
|
printf("%c", *p);
|
||||||
|
p ++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
printf("%c", *p);
|
||||||
|
p ++;
|
||||||
|
if ((*p == ' ') || (*p =='\0')) printf("=1\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else p ++;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -48,6 +48,7 @@ SBLAS3OBJS = \
|
||||||
|
|
||||||
ifeq ($(BUILD_BFLOAT16),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
SBBLAS1OBJS = sbdot.$(SUFFIX)
|
SBBLAS1OBJS = sbdot.$(SUFFIX)
|
||||||
|
SBBLAS2OBJS = sbgemv.$(SUFFIX)
|
||||||
SBBLAS3OBJS = sbgemm.$(SUFFIX)
|
SBBLAS3OBJS = sbgemm.$(SUFFIX)
|
||||||
SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX)
|
SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
@ -284,6 +285,7 @@ CSBLAS3OBJS = \
|
||||||
|
|
||||||
ifeq ($(BUILD_BFLOAT16),1)
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX)
|
CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX)
|
||||||
|
CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX)
|
||||||
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX)
|
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX)
|
||||||
CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX)
|
CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
@ -382,6 +384,7 @@ SBLAS1OBJS += $(CSBLAS1OBJS)
|
||||||
SBLAS2OBJS += $(CSBLAS2OBJS)
|
SBLAS2OBJS += $(CSBLAS2OBJS)
|
||||||
SBLAS3OBJS += $(CSBLAS3OBJS)
|
SBLAS3OBJS += $(CSBLAS3OBJS)
|
||||||
SBBLAS1OBJS += $(CSBBLAS1OBJS)
|
SBBLAS1OBJS += $(CSBBLAS1OBJS)
|
||||||
|
SBBLAS2OBJS += $(CSBBLAS2OBJS)
|
||||||
SBBLAS3OBJS += $(CSBBLAS3OBJS)
|
SBBLAS3OBJS += $(CSBBLAS3OBJS)
|
||||||
DBLAS1OBJS += $(CDBLAS1OBJS)
|
DBLAS1OBJS += $(CDBLAS1OBJS)
|
||||||
DBLAS2OBJS += $(CDBLAS2OBJS)
|
DBLAS2OBJS += $(CDBLAS2OBJS)
|
||||||
|
@ -399,7 +402,7 @@ CBAUXOBJS += $(CXERBLAOBJ)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
|
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
|
||||||
SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS3OBJS)
|
SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS)
|
||||||
DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS)
|
DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS)
|
||||||
QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS)
|
QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS)
|
||||||
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
||||||
|
@ -507,7 +510,7 @@ ifneq ($(BUILD_COMPLEX16),1)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
||||||
$(info FUNCOBJS = {[$(FUNCOBJS)]} )
|
|
||||||
ifdef EXPRECISION
|
ifdef EXPRECISION
|
||||||
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||||
endif
|
endif
|
||||||
|
@ -538,7 +541,7 @@ clean ::
|
||||||
level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS)
|
level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS)
|
||||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||||
|
|
||||||
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
|
level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
|
||||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||||
|
|
||||||
level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
|
level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
|
||||||
|
@ -929,6 +932,11 @@ xgeru.$(SUFFIX) xgeru.$(PSUFFIX) : zger.c
|
||||||
xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c
|
xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c
|
||||||
$(CC) -c $(CFLAGS) -DCONJ $< -o $(@F)
|
$(CC) -c $(CFLAGS) -DCONJ $< -o $(@F)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
|
sbgemv.$(SUFFIX) sbgemv.$(PSUFFIX) : sbgemv.c
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
endif
|
||||||
|
|
||||||
ifndef USE_NETLIB_GEMV
|
ifndef USE_NETLIB_GEMV
|
||||||
sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c
|
sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c
|
||||||
$(CC) -c $(CFLAGS) -o $(@F) $<
|
$(CC) -c $(CFLAGS) -o $(@F) $<
|
||||||
|
@ -1656,6 +1664,11 @@ cblas_csscal.$(SUFFIX) cblas_csscal.$(PSUFFIX) : zscal.c
|
||||||
cblas_zdscal.$(SUFFIX) cblas_zdscal.$(PSUFFIX) : zscal.c
|
cblas_zdscal.$(SUFFIX) cblas_zdscal.$(PSUFFIX) : zscal.c
|
||||||
$(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F)
|
$(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
|
cblas_sbgemv.$(SUFFIX) cblas_sbgemv.$(PSUFFIX) : sbgemv.c
|
||||||
|
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||||
|
endif
|
||||||
|
|
||||||
cblas_sgemv.$(SUFFIX) cblas_sgemv.$(PSUFFIX): gemv.c
|
cblas_sgemv.$(SUFFIX) cblas_sgemv.$(PSUFFIX): gemv.c
|
||||||
$(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $<
|
$(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $<
|
||||||
|
|
||||||
|
|
|
@ -191,7 +191,6 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
//printf("m=%d, n=%d, trans=%d, incx=%d, incy=%d, alpha=%f, beta=%f\n", m, n, trans, incx, incy, alpha, beta);
|
|
||||||
if ((m==0) || (n==0)) return;
|
if ((m==0) || (n==0)) return;
|
||||||
|
|
||||||
lenx = n;
|
lenx = n;
|
||||||
|
|
|
@ -0,0 +1,210 @@
|
||||||
|
/*********************************************************************/
|
||||||
|
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||||
|
/* All rights reserved. */
|
||||||
|
/* */
|
||||||
|
/* Redistribution and use in source and binary forms, with or */
|
||||||
|
/* without modification, are permitted provided that the following */
|
||||||
|
/* conditions are met: */
|
||||||
|
/* */
|
||||||
|
/* 1. Redistributions of source code must retain the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer. */
|
||||||
|
/* */
|
||||||
|
/* 2. Redistributions in binary form must reproduce the above */
|
||||||
|
/* copyright notice, this list of conditions and the following */
|
||||||
|
/* disclaimer in the documentation and/or other materials */
|
||||||
|
/* provided with the distribution. */
|
||||||
|
/* */
|
||||||
|
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||||
|
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||||
|
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||||
|
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||||
|
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||||
|
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||||
|
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||||
|
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||||
|
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||||
|
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||||
|
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||||
|
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||||
|
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||||
|
/* */
|
||||||
|
/* The views and conclusions contained in the software and */
|
||||||
|
/* documentation are those of the authors and should not be */
|
||||||
|
/* interpreted as representing official policies, either expressed */
|
||||||
|
/* or implied, of The University of Texas at Austin. */
|
||||||
|
/*********************************************************************/
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include "common.h"
|
||||||
|
#include "l1param.h"
|
||||||
|
#ifdef FUNCTION_PROFILE
|
||||||
|
#include "functable.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define ERROR_NAME "SBGEMV "
|
||||||
|
|
||||||
|
#ifdef SMP
|
||||||
|
static int (*sbgemv_thread[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG, int) = {
|
||||||
|
sbgemv_thread_n, sbgemv_thread_t,
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef CBLAS
|
||||||
|
|
||||||
|
void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasint *LDA, bfloat16 *x, blasint *INCX, float *BETA, float *y, blasint *INCY)
|
||||||
|
{
|
||||||
|
char trans = *TRANS;
|
||||||
|
blasint m = *M;
|
||||||
|
blasint n = *N;
|
||||||
|
blasint lda = *LDA;
|
||||||
|
blasint incx = *INCX;
|
||||||
|
blasint incy = *INCY;
|
||||||
|
float alpha = *ALPHA;
|
||||||
|
float beta = *BETA;
|
||||||
|
#ifdef SMP
|
||||||
|
int nthreads;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = {
|
||||||
|
SBGEMV_N, SBGEMV_T,
|
||||||
|
};
|
||||||
|
|
||||||
|
blasint info;
|
||||||
|
blasint lenx, leny;
|
||||||
|
blasint i;
|
||||||
|
|
||||||
|
PRINT_DEBUG_NAME;
|
||||||
|
|
||||||
|
TOUPPER(trans);
|
||||||
|
|
||||||
|
info = 0;
|
||||||
|
|
||||||
|
i = -1;
|
||||||
|
|
||||||
|
if (trans == 'N') {i = 0;}
|
||||||
|
if (trans == 'T') {i = 1;}
|
||||||
|
if (trans == 'R') {i = 0;}
|
||||||
|
if (trans == 'C') {i = 1;}
|
||||||
|
|
||||||
|
if (incy == 0) {info = 11;}
|
||||||
|
if (incx == 0) {info = 8;}
|
||||||
|
if (lda < MAX(1, m)) {info = 6;}
|
||||||
|
if (n < 0) {info = 3;}
|
||||||
|
if (m < 0) {info = 2;}
|
||||||
|
if (i < 0) {info = 1;}
|
||||||
|
|
||||||
|
trans = i;
|
||||||
|
|
||||||
|
if (info != 0) {
|
||||||
|
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, float alpha, bfloat16 *a, blasint lda, bfloat16 *x, blasint incx, float beta, float *y, blasint incy)
|
||||||
|
{
|
||||||
|
blasint lenx, leny;
|
||||||
|
int trans;
|
||||||
|
blasint info, t;
|
||||||
|
#ifdef SMP
|
||||||
|
int nthreads;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = {
|
||||||
|
SBGEMV_N, SBGEMV_T,
|
||||||
|
};
|
||||||
|
|
||||||
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
||||||
|
trans = -1;
|
||||||
|
info = 0;
|
||||||
|
|
||||||
|
if (order == CblasColMajor) { // Column Major
|
||||||
|
if (TransA == CblasNoTrans || TransA == CblasConjNoTrans) {
|
||||||
|
trans = 0;
|
||||||
|
} else if (TransA == CblasTrans || TransA == CblasConjTrans) {
|
||||||
|
trans = 1;
|
||||||
|
}
|
||||||
|
} else { // Row Major
|
||||||
|
if (TransA == CblasNoTrans || TransA == CblasConjNoTrans) {
|
||||||
|
trans = 1;
|
||||||
|
} else if (TransA == CblasTrans || TransA == CblasConjTrans) {
|
||||||
|
trans = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
t = n;
|
||||||
|
n = m;
|
||||||
|
m = t;
|
||||||
|
}
|
||||||
|
|
||||||
|
info = -1;
|
||||||
|
|
||||||
|
if (incy == 0) {info = 11;}
|
||||||
|
if (incx == 0) {info = 8;}
|
||||||
|
if (lda < MAX(1, m)) {info = 6;}
|
||||||
|
if (n < 0) {info = 3;}
|
||||||
|
if (m < 0) {info = 2;}
|
||||||
|
if (trans < 0) {info = 1;}
|
||||||
|
|
||||||
|
if (info >= 0) {
|
||||||
|
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((m==0) || (n==0)) return;
|
||||||
|
|
||||||
|
if (trans) {
|
||||||
|
lenx = m;
|
||||||
|
leny = n;
|
||||||
|
} else {
|
||||||
|
lenx = n;
|
||||||
|
leny = m;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alpha == ZERO) {
|
||||||
|
if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
IDEBUG_START;
|
||||||
|
FUNCTION_PROFILE_START();
|
||||||
|
|
||||||
|
if (incx < 0) {x -= (lenx - 1) * incx;}
|
||||||
|
if (incy < 0) {y -= (leny - 1) * incy;}
|
||||||
|
|
||||||
|
#ifdef SMP
|
||||||
|
int thread_thres_row = 20480;
|
||||||
|
if (trans) {
|
||||||
|
if (n <= thread_thres_row) {
|
||||||
|
nthreads = 1;
|
||||||
|
} else {
|
||||||
|
nthreads = num_cpu_avail(1);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (m <= thread_thres_row) {
|
||||||
|
nthreads = 1;
|
||||||
|
} else {
|
||||||
|
nthreads = num_cpu_avail(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (nthreads == 1) {
|
||||||
|
#endif
|
||||||
|
(sbgemv[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||||
|
#ifdef SMP
|
||||||
|
} else {
|
||||||
|
(sbgemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy, nthreads);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
||||||
|
IDEBUG_END;
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
|
@ -5,37 +5,24 @@ endif
|
||||||
TOPDIR = ..
|
TOPDIR = ..
|
||||||
include $(TOPDIR)/Makefile.system
|
include $(TOPDIR)/Makefile.system
|
||||||
|
|
||||||
ifdef HAVE_SSE3
|
|
||||||
CFLAGS += -msse3
|
|
||||||
endif
|
|
||||||
ifdef HAVE_SSSE3
|
|
||||||
CFLAGS += -mssse3
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(C_COMPILER), GCC)
|
|
||||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
|
||||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(ARCH), power)
|
ifeq ($(ARCH), power)
|
||||||
ifeq ($(C_COMPILER), CLANG)
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
override CFLAGS += -fno-integrated-as
|
override CFLAGS += -fno-integrated-as
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
AVX2OPT =
|
AVX2OPT =
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
# AVX2 support was added in 4.7.0
|
# AVX2 support was added in 4.7.0
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
|
||||||
AVX2OPT = -mavx2
|
AVX2OPT = -mavx2
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
ifeq ($(C_COMPILER), CLANG)
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
# Any clang posing as gcc 4.2 should be new enough (3.4 or later)
|
# Any clang posing as gcc 4.2 should be new enough (3.4 or later)
|
||||||
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
|
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2)
|
||||||
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2)
|
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2), 11)
|
|
||||||
AVX2OPT = -mavx2
|
AVX2OPT = -mavx2
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
@ -44,12 +31,6 @@ ifdef NO_AVX2
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef TARGET_CORE
|
ifdef TARGET_CORE
|
||||||
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
|
|
||||||
override CFLAGS += -msse -msse2 -msse3 -mssse3 -msse4.1
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE BANIAS NORTHWOOD ATHLON OPTERON))
|
|
||||||
override CFLAGS += -msse -msse2
|
|
||||||
endif
|
|
||||||
ifeq ($(TARGET_CORE), COOPERLAKE)
|
ifeq ($(TARGET_CORE), COOPERLAKE)
|
||||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||||
|
|
|
@ -48,6 +48,16 @@ ifndef XGEMVTKERNEL
|
||||||
XGEMVTKERNEL = zgemv_t.S
|
XGEMVTKERNEL = zgemv_t.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
|
ifndef SBGEMVNKERNEL
|
||||||
|
SBGEMVNKERNEL = ../x86_64/sbgemv_n.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifndef SBGEMVTKERNEL
|
||||||
|
SBGEMVTKERNEL = ../x86_64/sbgemv_t.c
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
### GER ###
|
### GER ###
|
||||||
|
|
||||||
ifndef SGERKERNEL
|
ifndef SGERKERNEL
|
||||||
|
@ -234,6 +244,12 @@ XBLASOBJS += \
|
||||||
xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \
|
xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \
|
||||||
xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX)
|
xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
|
SBBLASOBJS += \
|
||||||
|
sbgemv_n$(TSUFFIX).$(SUFFIX) \
|
||||||
|
sbgemv_t$(TSUFFIX).$(SUFFIX)
|
||||||
|
endif
|
||||||
|
|
||||||
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
|
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
|
||||||
$(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
|
$(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
|
||||||
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@
|
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@
|
||||||
|
@ -483,4 +499,10 @@ $(KDIR)xhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER
|
||||||
$(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h
|
$(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h
|
||||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
|
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
|
||||||
|
|
||||||
|
ifeq ($(BUILD_BFLOAT16),1)
|
||||||
|
$(KDIR)sbgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_n$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMVNKERNEL)
|
||||||
|
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
|
||||||
|
$(KDIR)sbgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_t$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMVTKERNEL)
|
||||||
|
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
|
@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
||||||
i++ ;
|
i++ ;
|
||||||
|
|
||||||
}
|
}
|
||||||
#if !defined(__POWER__)
|
#if !defined(__PPC__)
|
||||||
CREAL(result) = dot[0];
|
CREAL(result) = dot[0];
|
||||||
CIMAG(result) = dot[1];
|
CIMAG(result) = dot[1];
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -34,12 +34,12 @@ SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_power10.c
|
DGEMMKERNEL = dgemm_kernel_power10.c
|
||||||
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
DGEMMINCOPY =
|
||||||
DGEMMITCOPY = dgemm_tcopy_16_power8.S
|
DGEMMITCOPY =
|
||||||
DGEMMONCOPY = dgemm_ncopy_4_power8.S
|
DGEMMONCOPY = dgemm_ncopy_8_power10.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
DGEMMINCOPYOBJ =
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMITCOPYOBJ =
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
|
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
@ -141,23 +141,19 @@ DASUMKERNEL = dasum.c
|
||||||
CASUMKERNEL = casum.c
|
CASUMKERNEL = casum.c
|
||||||
ZASUMKERNEL = zasum.c
|
ZASUMKERNEL = zasum.c
|
||||||
#
|
#
|
||||||
SAXPYKERNEL = saxpy.c
|
SAXPYKERNEL = saxpy_power10.c
|
||||||
DAXPYKERNEL = daxpy_power10.c
|
DAXPYKERNEL = daxpy_power10.c
|
||||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
CAXPYKERNEL = caxpy_power10.c
|
||||||
CAXPYKERNEL = caxpy_power9.S
|
|
||||||
else
|
|
||||||
CAXPYKERNEL = caxpy.c
|
|
||||||
endif
|
|
||||||
ZAXPYKERNEL = zaxpy_power10.c
|
ZAXPYKERNEL = zaxpy_power10.c
|
||||||
#
|
#
|
||||||
SCOPYKERNEL = scopy.c
|
SCOPYKERNEL = scopy_power10.c
|
||||||
DCOPYKERNEL = dcopy_power10.c
|
DCOPYKERNEL = dcopy_power10.c
|
||||||
CCOPYKERNEL = ccopy.c
|
CCOPYKERNEL = ccopy_power10.c
|
||||||
ZCOPYKERNEL = zcopy_power10.c
|
ZCOPYKERNEL = zcopy_power10.c
|
||||||
#
|
#
|
||||||
SDOTKERNEL = sdot.c
|
SDOTKERNEL = sdot_power10.c
|
||||||
DDOTKERNEL = ddot.c
|
DDOTKERNEL = ddot_power10.c
|
||||||
DSDOTKERNEL = sdot.c
|
DSDOTKERNEL = sdot_power10.c
|
||||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||||
CDOTKERNEL = cdot_power9.S
|
CDOTKERNEL = cdot_power9.S
|
||||||
else
|
else
|
||||||
|
|
|
@ -0,0 +1,188 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2020, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#define HAVE_KERNEL_8 1
|
||||||
|
static void caxpy_kernel_8 (long n, float *x, float *y,
|
||||||
|
float alpha_r, float alpha_i)
|
||||||
|
{
|
||||||
|
#if !defined(CONJ)
|
||||||
|
static const float mvec[4] = { -1.0, 1.0, -1.0, 1.0 };
|
||||||
|
#else
|
||||||
|
static const float mvec[4] = { 1.0, -1.0, 1.0, -1.0 };
|
||||||
|
#endif
|
||||||
|
const float *mvecp = mvec;
|
||||||
|
/* We have to load reverse mask for big endian. */
|
||||||
|
/* __vector unsigned char mask={ 4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}; */
|
||||||
|
|
||||||
|
__vector unsigned char mask = { 11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4};
|
||||||
|
long ytmp;
|
||||||
|
|
||||||
|
__asm__
|
||||||
|
(
|
||||||
|
"xscvdpspn 32, %7 \n\t"
|
||||||
|
"xscvdpspn 33, %8 \n\t"
|
||||||
|
"xxspltw 32, 32, 0 \n\t"
|
||||||
|
"xxspltw 33, 33, 0 \n\t"
|
||||||
|
"lxvd2x 36, 0, %9 \n\t" // mvec
|
||||||
|
|
||||||
|
#if !defined(CONJ)
|
||||||
|
"xvmulsp 33, 33, 36 \n\t" // alpha_i * mvec
|
||||||
|
#else
|
||||||
|
"xvmulsp 32, 32, 36 \n\t" // alpha_r * mvec
|
||||||
|
#endif
|
||||||
|
"mr %4, %3 \n\t"
|
||||||
|
"dcbt 0, %2 \n\t"
|
||||||
|
"dcbt 0, %3 \n\t"
|
||||||
|
|
||||||
|
"lxvp 40, 0(%2) \n\t" // x0
|
||||||
|
"lxvp 42, 32(%2) \n\t" // x2
|
||||||
|
"lxvp 48, 0(%3) \n\t" // y0
|
||||||
|
"lxvp 50, 32(%3) \n\t" // y2
|
||||||
|
|
||||||
|
"xxperm 52, 40, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 53, 41, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 54, 42, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 55, 43, %x10 \n\t" // exchange real and imag part
|
||||||
|
|
||||||
|
"lxvp 44, 64(%2) \n\t" // x4
|
||||||
|
"lxvp 46, 96(%2) \n\t" // x6
|
||||||
|
"lxvp 34, 64(%3) \n\t" // y4
|
||||||
|
"lxvp 38, 96(%3) \n\t" // y6
|
||||||
|
|
||||||
|
"xxperm 56, 44, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 57, 45, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 58, 46, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 59, 47, %x10 \n\t" // exchange real and imag part
|
||||||
|
|
||||||
|
"addi %2, %2, 128 \n\t"
|
||||||
|
"addi %3, %3, 128 \n\t"
|
||||||
|
|
||||||
|
"addic. %1, %1, -16 \n\t"
|
||||||
|
"ble two%= \n\t"
|
||||||
|
|
||||||
|
".align 5 \n"
|
||||||
|
"one%=: \n\t"
|
||||||
|
|
||||||
|
"xvmaddasp 48, 40, 32 \n\t" // alpha_r * x0_r , alpha_r * x0_i
|
||||||
|
"xvmaddasp 49, 41, 32 \n\t"
|
||||||
|
"lxvp 40, 0(%2) \n\t" // x0
|
||||||
|
"xvmaddasp 50, 42, 32 \n\t"
|
||||||
|
"xvmaddasp 51, 43, 32 \n\t"
|
||||||
|
"lxvp 42, 32(%2) \n\t" // x2
|
||||||
|
|
||||||
|
"xvmaddasp 34, 44, 32 \n\t"
|
||||||
|
"xvmaddasp 35, 45, 32 \n\t"
|
||||||
|
"lxvp 44, 64(%2) \n\t" // x4
|
||||||
|
"xvmaddasp 38, 46, 32 \n\t"
|
||||||
|
"xvmaddasp 39, 47, 32 \n\t"
|
||||||
|
"lxvp 46, 96(%2) \n\t" // x6
|
||||||
|
|
||||||
|
"xvmaddasp 48, 52, 33 \n\t" // alpha_i * x0_i , alpha_i * x0_r
|
||||||
|
"addi %2, %2, 128 \n\t"
|
||||||
|
"xvmaddasp 49, 53, 33 \n\t"
|
||||||
|
"xvmaddasp 50, 54, 33 \n\t"
|
||||||
|
"xvmaddasp 51, 55, 33 \n\t"
|
||||||
|
|
||||||
|
"xvmaddasp 34, 56, 33 \n\t"
|
||||||
|
"xvmaddasp 35, 57, 33 \n\t"
|
||||||
|
"xvmaddasp 38, 58, 33 \n\t"
|
||||||
|
"xvmaddasp 39, 59, 33 \n\t"
|
||||||
|
|
||||||
|
"stxvp 48, 0(%4) \n\t"
|
||||||
|
"stxvp 50, 32(%4) \n\t"
|
||||||
|
"stxvp 34, 64(%4) \n\t"
|
||||||
|
"stxvp 38, 96(%4) \n\t"
|
||||||
|
|
||||||
|
"addi %4, %4, 128 \n\t"
|
||||||
|
"xxperm 52, 40, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 53, 41, %x10 \n\t" // exchange real and imag part
|
||||||
|
|
||||||
|
"lxvp 48, 0(%3) \n\t" // y0
|
||||||
|
"xxperm 54, 42, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 55, 43, %x10 \n\t" // exchange real and imag part
|
||||||
|
"lxvp 50, 32(%3) \n\t" // y2
|
||||||
|
|
||||||
|
"xxperm 56, 44, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 57, 45, %x10 \n\t" // exchange real and imag part
|
||||||
|
"lxvp 34, 64(%3) \n\t" // y4
|
||||||
|
"xxperm 58, 46, %x10 \n\t" // exchange real and imag part
|
||||||
|
"xxperm 59, 47, %x10 \n\t" // exchange real and imag part
|
||||||
|
"lxvp 38, 96(%3) \n\t" // y6
|
||||||
|
|
||||||
|
"addi %3, %3, 128 \n\t"
|
||||||
|
|
||||||
|
"addic. %1, %1, -16 \n\t"
|
||||||
|
"bgt one%= \n"
|
||||||
|
|
||||||
|
"two%=: \n\t"
|
||||||
|
"xvmaddasp 48, 40, 32 \n\t" // alpha_r * x0_r , alpha_r * x0_i
|
||||||
|
"xvmaddasp 49, 41, 32 \n\t"
|
||||||
|
"xvmaddasp 50, 42, 32 \n\t"
|
||||||
|
"xvmaddasp 51, 43, 32 \n\t"
|
||||||
|
|
||||||
|
"xvmaddasp 34, 44, 32 \n\t"
|
||||||
|
"xvmaddasp 35, 45, 32 \n\t"
|
||||||
|
"xvmaddasp 38, 46, 32 \n\t"
|
||||||
|
"xvmaddasp 39, 47, 32 \n\t"
|
||||||
|
|
||||||
|
"xvmaddasp 48, 52, 33 \n\t" // alpha_i * x0_i , alpha_i * x0_r
|
||||||
|
"xvmaddasp 49, 53, 33 \n\t"
|
||||||
|
"xvmaddasp 50, 54, 33 \n\t"
|
||||||
|
"xvmaddasp 51, 55, 33 \n\t"
|
||||||
|
|
||||||
|
"xvmaddasp 34, 56, 33 \n\t"
|
||||||
|
"xvmaddasp 35, 57, 33 \n\t"
|
||||||
|
"xvmaddasp 38, 58, 33 \n\t"
|
||||||
|
"xvmaddasp 39, 59, 33 \n\t"
|
||||||
|
|
||||||
|
"stxvp 48, 0(%4) \n\t"
|
||||||
|
"stxvp 50, 32(%4) \n\t"
|
||||||
|
"stxvp 34, 64(%4) \n\t"
|
||||||
|
"stxvp 38, 96(%4) \n\t"
|
||||||
|
|
||||||
|
"#n=%1 x=%5=%2 y=%0=%3 alpha=(%7,%8) mvecp=%6=%9 ytmp=%4\n"
|
||||||
|
:
|
||||||
|
"+m" (*y),
|
||||||
|
"+r" (n), // 1
|
||||||
|
"+b" (x), // 2
|
||||||
|
"+b" (y), // 3
|
||||||
|
"=b" (ytmp) // 4
|
||||||
|
:
|
||||||
|
"m" (*x),
|
||||||
|
"m" (*mvecp),
|
||||||
|
"d" (alpha_r), // 7
|
||||||
|
"d" (alpha_i), // 8
|
||||||
|
"4" (mvecp), // 9
|
||||||
|
"wa" (mask)
|
||||||
|
:
|
||||||
|
"cr0",
|
||||||
|
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
|
||||||
|
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
|
||||||
|
"vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55",
|
||||||
|
"vs56","vs57","vs58","vs59"
|
||||||
|
);
|
||||||
|
}
|
|
@ -0,0 +1,126 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2020, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||||
|
#include "caxpy_microk_power10.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef HAVE_KERNEL_8
|
||||||
|
|
||||||
|
static void caxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT da_r,FLOAT da_i)
|
||||||
|
{
|
||||||
|
BLASLONG register i = 0;
|
||||||
|
BLASLONG register ix = 0;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
while(i < n)
|
||||||
|
{
|
||||||
|
#if !defined(CONJ)
|
||||||
|
y[ix] += ( da_r * x[ix] - da_i * x[ix+1] ) ;
|
||||||
|
y[ix+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ;
|
||||||
|
y[ix+2] += ( da_r * x[ix+2] - da_i * x[ix+3] ) ;
|
||||||
|
y[ix+3] += ( da_r * x[ix+3] + da_i * x[ix+2] ) ;
|
||||||
|
#else
|
||||||
|
y[ix] += ( da_r * x[ix] + da_i * x[ix+1] ) ;
|
||||||
|
y[ix+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ;
|
||||||
|
y[ix+2] += ( da_r * x[ix+2] + da_i * x[ix+3] ) ;
|
||||||
|
y[ix+3] -= ( da_r * x[ix+3] - da_i * x[ix+2] ) ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ix+=4 ;
|
||||||
|
i+=2 ;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||||
|
{
|
||||||
|
BLASLONG i=0;
|
||||||
|
BLASLONG ix=0,iy=0;
|
||||||
|
|
||||||
|
if ( n <= 0 ) return(0);
|
||||||
|
|
||||||
|
if ( (inc_x == 1) && (inc_y == 1) )
|
||||||
|
{
|
||||||
|
|
||||||
|
BLASLONG n1 = n & -16;
|
||||||
|
|
||||||
|
if ( n1 )
|
||||||
|
{
|
||||||
|
caxpy_kernel_8 (n1, x, y, da_r, da_i);
|
||||||
|
ix = 2 * n1;
|
||||||
|
}
|
||||||
|
i = n1;
|
||||||
|
while(i < n)
|
||||||
|
{
|
||||||
|
#if !defined(CONJ)
|
||||||
|
y[ix] += ( da_r * x[ix] - da_i * x[ix+1] ) ;
|
||||||
|
y[ix+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ;
|
||||||
|
#else
|
||||||
|
y[ix] += ( da_r * x[ix] + da_i * x[ix+1] ) ;
|
||||||
|
y[ix+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ;
|
||||||
|
#endif
|
||||||
|
i++ ;
|
||||||
|
ix += 2;
|
||||||
|
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
inc_x *=2;
|
||||||
|
inc_y *=2;
|
||||||
|
|
||||||
|
while(i < n)
|
||||||
|
{
|
||||||
|
|
||||||
|
#if !defined(CONJ)
|
||||||
|
y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ;
|
||||||
|
y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ;
|
||||||
|
#else
|
||||||
|
y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ;
|
||||||
|
y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ;
|
||||||
|
#endif
|
||||||
|
ix += inc_x ;
|
||||||
|
iy += inc_y ;
|
||||||
|
i++ ;
|
||||||
|
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2013-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||||
|
#include "copy_microk_power10.c"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef HAVE_KERNEL
|
||||||
|
|
||||||
|
static void copy_kernel(BLASLONG n, FLOAT *x, FLOAT *y)
|
||||||
|
{
|
||||||
|
|
||||||
|
BLASLONG i=0;
|
||||||
|
FLOAT f0, f1, f2, f3, f4, f5, f6, f7;
|
||||||
|
FLOAT *x1=x;
|
||||||
|
FLOAT *y1=y;
|
||||||
|
|
||||||
|
while ( i<n )
|
||||||
|
{
|
||||||
|
|
||||||
|
f0 = x1[0];
|
||||||
|
f1 = x1[1];
|
||||||
|
f2 = x1[2];
|
||||||
|
f3 = x1[3];
|
||||||
|
f4 = x1[4];
|
||||||
|
f5 = x1[5];
|
||||||
|
f6 = x1[6];
|
||||||
|
f7 = x1[7];
|
||||||
|
|
||||||
|
y1[0] = f0;
|
||||||
|
y1[1] = f1;
|
||||||
|
y1[2] = f2;
|
||||||
|
y1[3] = f3;
|
||||||
|
y1[4] = f4;
|
||||||
|
y1[5] = f5;
|
||||||
|
y1[6] = f6;
|
||||||
|
y1[7] = f7;
|
||||||
|
|
||||||
|
x1 += 8;
|
||||||
|
y1 += 8;
|
||||||
|
|
||||||
|
i+=4;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
|
{
|
||||||
|
BLASLONG i=0;
|
||||||
|
BLASLONG ix=0,iy=0;
|
||||||
|
|
||||||
|
if ( n <= 0 ) return(0);
|
||||||
|
|
||||||
|
if ( (inc_x == 1) && (inc_y == 1 ))
|
||||||
|
{
|
||||||
|
|
||||||
|
BLASLONG n1 = n & -64;
|
||||||
|
if ( n1 > 0 )
|
||||||
|
{
|
||||||
|
copy_kernel(n1, x, y);
|
||||||
|
i=n1;
|
||||||
|
ix=n1*2;
|
||||||
|
iy=n1*2;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(i < n)
|
||||||
|
{
|
||||||
|
y[iy] = x[iy] ;
|
||||||
|
y[iy+1] = x[ix+1] ;
|
||||||
|
ix+=2;
|
||||||
|
iy+=2;
|
||||||
|
i++ ;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
|
||||||
|
BLASLONG inc_x2 = 2 * inc_x;
|
||||||
|
BLASLONG inc_y2 = 2 * inc_y;
|
||||||
|
|
||||||
|
while(i < n)
|
||||||
|
{
|
||||||
|
y[iy] = x[ix] ;
|
||||||
|
y[iy+1] = x[ix+1] ;
|
||||||
|
ix += inc_x2 ;
|
||||||
|
iy += inc_y2 ;
|
||||||
|
i++ ;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue