Merge branch 'develop' into risc-v

This commit is contained in:
Xianyi Zhang 2020-11-10 09:18:25 +08:00
commit 913cc9a4ca
162 changed files with 8817 additions and 4767 deletions

View File

@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM) project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3) set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 10.dev) set(OpenBLAS_PATCH_VERSION 12.dev)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
# Adhere to GNU filesystem layout conventions # Adhere to GNU filesystem layout conventions

View File

@ -1,4 +1,102 @@
OpenBLAS ChangeLog OpenBLAS ChangeLog
====================================================================
Version 0.3.12
24-Oct-2020
common:
* Fixed missing BLAS/LAPACK functions (inadvertently dropped during
the build system restructuring)
* Fixed argument conversion macro in LAPACKE_zgesvdq (LAPACK #458)
POWER:
* Added optimized SCOPY/CCOPY kernels for POWER10
* Increased and unified the default size of the GEMM BUFFER
* Fixed building for POWER10 in DYNAMIC_ARCH mode
* POWER10 compatibility test now checks binutils version as well
* Cleaned up compiler warnings
x86_64:
* corrected compiler version checks for AVX2 compatibility
* added compiler option -mavx2 for building with flang
* fixed direct SGEMM pathway for small matrix sizes (broken by
the code refactoring in 0.3.11)
* fixed unhandled partial register clobbers in several kernels
for AXPY,DOT,GEMV_N and GEMV_T flagged by gcc10 tree-vectorizer
ARMV8:
* improved Apple Vortex support to include cross-compiling
====================================================================
Version 0.3.11
17-Oct-2020
common:
* API change:
the newly added BFLOAT16 functions were renamed to use the
letter "B" instead of "H" to avoid potential confusion with
the IEEE "half precision float" type, i.e. the 0.3.10
SHGEMM is now SBGEMM and the corresponding build option
was changed from "BUILD_HALF" to "BUILD_BFLOAT16".
* Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper
limit for placing temporary arrays on the stack) to be compatible
with a stack size of 1mb (as imposed by the JAVA runtime library)
* Added mixed-precision dot function SBDOT and utility functions
shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between
single or double precision float arrays and bfloat16 arrays
* Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions
in lapack.h
* Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2
(causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263)
* Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415)
* Fixed several bugs in the LAPACK testsuite
* Improved performance of TRMM and TRSM for certain problem sizes
* Fixed infinite recursions and workspace miscalculations in ReLAPACK
* CMAKE builds no longer require pkg-config for creating the .pc file
* Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as
enabling these options
* Fixed detection of gfortran when invoked through an mpi wrapper
* Improve thread reinitialization performance with OpenMP after a fork
* Added support for building only the subset of the library required
for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE
* Optional function name prefixes and suffixes are now correctly
reflected in the generated cblas.h
* Added CMAKE build support for the LAPACK and multithreading tests
POWER:
* Added optimized support for POWER10
* Added support for compiling for POWER8 in 32bit mode
* Added support for compilation with LLVM/clang
* Added support for compilation with NVIDIA/PGI compilers
* Fixed building on big-endian POWER8
* Fixed miscompilation of ZDOTC by gcc10
* Fixed alignment errors in the POWER8 SAXPY kernel
* Improved CPU detection on AIX
* Supported building with older compilers on POWER9
x86_64:
* Added support for Intel Cooperlake
* Added autodetection of AMD Renoir/Matisse/Zen3 cpus
* Added autodetection of Intel Comet Lake cpus
* Reimplemented ?sum, ?dot and daxpy using universal intrinsics
* Reset the fpu state before using the fpu on Windows as a workaround
for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004)
* Fixed potentially undefined behaviour in the dot and gemv_t kernels
* Fixed a potential segmentation fault in DYNAMIC_ARCH builds
* Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers
ARMV7:
* Fixed cpu detection on BSD-like systems
ARMV8:
* Added preliminary support for Apple Vortex cpus
* Added support for the Cavium ThunderX3T110 cpu
* Fixed cpu detection on BSD-like systems
* Fixed compilation in -std=C18 mode
IBM Z:
* Added support for compiling with the clang compiler
* Improved GEMM performance on Z14
==================================================================== ====================================================================
Version 0.3.10 Version 0.3.10
14-Jun-2020 14-Jun-2020

View File

@ -12,3 +12,8 @@ ifeq ($(CORE), ARMV6)
CCOMMON_OPT += -mfpu=vfp CCOMMON_OPT += -mfpu=vfp
FCOMMON_OPT += -mfpu=vfp FCOMMON_OPT += -mfpu=vfp
endif endif
ifdef HAVE_NEON
CCOMMON_OPT += -mfpu=neon
FCOMMON_OPT += -mfpu=neon
endif

View File

@ -10,7 +10,7 @@ USE_OPENMP = 1
endif endif
ifeq ($(CORE), POWER10) ifeq ($(CORE), POWER10)
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
endif endif

View File

@ -3,7 +3,7 @@
# #
# This library's version # This library's version
VERSION = 0.3.10.dev VERSION = 0.3.12.dev
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@ -295,10 +295,13 @@ COMMON_PROF = -pg
# the below is not yet configurable, use cmake if you need to build only select types # By default the library contains BLAS functions (and LAPACK if selected) for all input types.
BUILD_SINGLE = 1 # To build a smaller library supporting e.g. only single precision real (SGEMM etc.) or only
BUILD_DOUBLE = 1 # the functions for complex numbers, uncomment the desired type(s) below
BUILD_COMPLEX = 1 # BUILD_SINGLE = 1
BUILD_COMPLEX16 = 1 # BUILD_DOUBLE = 1
# BUILD_COMPLEX = 1
# BUILD_COMPLEX16 = 1
#
# End of user configuration # End of user configuration
# #

View File

@ -6,7 +6,7 @@
INCLUDED = 1 INCLUDED = 1
ifndef TOPDIR ifndef TOPDIR
TOPDIR = . TOPDIR = .
endif endif
# If ARCH is not set, we use the host system's architecture for getarch compile options. # If ARCH is not set, we use the host system's architecture for getarch compile options.
@ -93,6 +93,12 @@ endif
ifdef TARGET ifdef TARGET
GETARCH_FLAGS := -DFORCE_$(TARGET) GETARCH_FLAGS := -DFORCE_$(TARGET)
GETARCH_FLAGS += -DUSER_TARGET GETARCH_FLAGS += -DUSER_TARGET
ifeq ($(TARGET), GENERIC)
ifeq ($(DYNAMIC_ARCH), 1)
override NO_EXPRECISION=1
export NO_EXPRECiSION
endif
endif
endif endif
# Force fallbacks for 32bit # Force fallbacks for 32bit
@ -246,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)"
ifndef TARGET_CORE ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf include $(TOPDIR)/Makefile.conf
else else
HAVE_NEON=
HAVE_VFP=
HAVE_VFPV3=
HAVE_VFPV4=
HAVE_MMX=
HAVE_SSE=
HAVE_SSE2=
HAVE_SSE3=
HAVE_SSSE3=
HAVE_SSE4_1=
HAVE_SSE4_2=
HAVE_SSE4A=
HAVE_SSE5=
HAVE_AVX=
HAVE_AVX2=
HAVE_FMA3=
include $(TOPDIR)/Makefile_kernel.conf include $(TOPDIR)/Makefile_kernel.conf
endif endif
@ -319,6 +341,7 @@ ifeq ($(GCCVERSIONGTEQ7),1)
else else
GCCDUMPVERSION_PARAM := -dumpversion GCCDUMPVERSION_PARAM := -dumpversion
endif endif
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 1)
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2) GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7) GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
endif endif
@ -641,6 +664,7 @@ DYNAMIC_CORE += POWER8
ifneq ($(C_COMPILER), GCC) ifneq ($(C_COMPILER), GCC)
DYNAMIC_CORE += POWER9 DYNAMIC_CORE += POWER9
DYNAMIC_CORE += POWER10 DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT
endif endif
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
ifeq ($(GCCVERSIONGT5), 1) ifeq ($(GCCVERSIONGT5), 1)
@ -648,11 +672,14 @@ DYNAMIC_CORE += POWER9
else else
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
endif endif
ifeq ($(GCCVERSIONGTEQ11), 1) LDVERSIONGTEQ35 := $(shell expr `ld --version | head -1 | cut -f2 -d "." | cut -f1 -d "-"` >= 35)
ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10 DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT
else ifeq ($(GCCVERSIONGTEQ10), 1) else ifeq ($(GCCVERSIONGTEQ10), 1)
ifeq ($(GCCMINORVERSIONGTEQ2), 1) ifeq ($(GCCMINORVERSIONGTEQ2)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10 DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT
endif endif
else else
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.) $(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
@ -853,7 +880,7 @@ CCOMMON_OPT += -DF_INTERFACE_FLANG
FCOMMON_OPT += -Mrecursive -Kieee FCOMMON_OPT += -Mrecursive -Kieee
ifeq ($(OSNAME), Linux) ifeq ($(OSNAME), Linux)
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`) FLANG_VENDOR := $(shell `$(FC) --version|cut -f 1 -d "."|head -1`)
ifeq ($(FLANG_VENDOR),AOCC) ifeq ($(FLANG_VENDOR),AOCC)
FCOMMON_OPT += -fno-unroll-loops FCOMMON_OPT += -fno-unroll-loops
endif endif
@ -1515,6 +1542,8 @@ export HAVE_SSE4_2
export HAVE_SSE4A export HAVE_SSE4A
export HAVE_SSE5 export HAVE_SSE5
export HAVE_AVX export HAVE_AVX
export HAVE_AVX2
export HAVE_FMA3
export HAVE_VFP export HAVE_VFP
export HAVE_VFPV3 export HAVE_VFPV3
export HAVE_VFPV4 export HAVE_VFPV4

View File

@ -9,9 +9,9 @@ endif
endif endif
ifdef HAVE_SSE3 ifdef HAVE_SSE3
ifndef DYNAMIC_ARCH
CCOMMON_OPT += -msse3 CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3 FCOMMON_OPT += -msse3
endif
ifdef HAVE_SSSE3 ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3 CCOMMON_OPT += -mssse3
FCOMMON_OPT += -mssse3 FCOMMON_OPT += -mssse3
@ -20,7 +20,17 @@ ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1 CCOMMON_OPT += -msse4.1
FCOMMON_OPT += -msse4.1 FCOMMON_OPT += -msse4.1
endif endif
ifdef HAVE_AVX
CCOMMON_OPT += -mavx
FCOMMON_OPT += -mavx
endif endif
ifdef HAVE_AVX2
CCOMMON_OPT += -mavx2
FCOMMON_OPT += -mavx2
endif
ifdef HAVE_FMA3
CCOMMON_OPT += -mfma
FCOMMON_OPT += -mfma
endif endif
ifeq ($(CORE), SKYLAKEX) ifeq ($(CORE), SKYLAKEX)
@ -47,8 +57,6 @@ ifndef DYNAMIC_ARCH
ifndef NO_AVX512 ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
# cooperlake support was added in 10.1 # cooperlake support was added in 10.1
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1)
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11) ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
CCOMMON_OPT += -march=cooperlake CCOMMON_OPT += -march=cooperlake
FCOMMON_OPT += -march=cooperlake FCOMMON_OPT += -march=cooperlake
@ -68,24 +76,31 @@ endif
endif endif
endif endif
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE)) ifdef HAVE_AVX2
ifndef DYNAMIC_ARCH
ifndef NO_AVX2 ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0 # AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11) CCOMMON_OPT += -mavx2
endif
else
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -mavx2 CCOMMON_OPT += -mavx2
endif endif
endif endif
ifeq ($(F_COMPILER), GFORTRAN) ifeq ($(F_COMPILER), GFORTRAN)
# AVX2 support was added in 4.7.0 # AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4) GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7) GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11) GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
FCOMMON_OPT += -mavx2 FCOMMON_OPT += -mavx2
endif endif
else
ifeq ($(F_COMPILER), FLANG)
FCOMMON_OPT += -mavx2
endif endif
endif endif
endif endif

View File

@ -25,125 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef AMAX #undef AMAX
#ifdef COMPLEX #ifdef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define AMAX BLASFUNC(dzamax) #define AMAX BLASFUNC(dzamax)
#else #else
#define AMAX BLASFUNC(scamax) #define AMAX BLASFUNC(scamax)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define AMAX BLASFUNC(damax) #define AMAX BLASFUNC(damax)
#else #else
#define AMAX BLASFUNC(samax) #define AMAX BLASFUNC(samax)
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__) int main(int argc, char *argv[])
{
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x=1; blasint inc_x = 1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1;
int to = 200;
int step = 1;
int from = 1; double time1, timeg;
int to = 200;
int step = 1;
struct timeval start, stop; argc--;
double time1,timeg; argv++;
argc--;argv++; if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}
if (argc > 0) { from = atol(*argv); argc--; argv++;} if ((p = getenv("OPENBLAS_LOOPS")))
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} loops = atoi(p);
if (argc > 0) { step = atol(*argv); argc--; argv++;} if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ fprintf(stderr, "Out of Memory!!\n");
fprintf(stderr,"Out of Memory!!\n");exit(1); exit(1);
} }
#ifdef __linux #ifdef __linux
@ -152,37 +100,31 @@ int main(int argc, char *argv[]){
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) for (m = from; m <= to; m += step)
{ {
timeg=0; timeg = 0;
fprintf(stderr, " %6d : ", (int)m);
fprintf(stderr, " %6d : ", (int)m); for (l = 0; l < loops; l++)
{
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
for (l=0; l<loops; l++) begin();
{ AMAX(&m, x, &inc_x);
end();
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ timeg += getsec();
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
AMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;

View File

@ -25,124 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef AMIN #undef AMIN
#ifdef COMPLEX #ifdef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define AMIN BLASFUNC(dzamin) #define AMIN BLASFUNC(dzamin)
#else #else
#define AMIN BLASFUNC(scamin) #define AMIN BLASFUNC(scamin)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define AMIN BLASFUNC(damin) #define AMIN BLASFUNC(damin)
#else #else
#define AMIN BLASFUNC(samin) #define AMIN BLASFUNC(samin)
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__) int main(int argc, char *argv[])
{
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x=1; blasint inc_x = 1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop; double time1, timeg;
double time1,timeg;
argc--;argv++; argc--;
argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0)
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} {
if (argc > 0) { step = atol(*argv); argc--; argv++;} from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); if ((p = getenv("OPENBLAS_LOOPS")))
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
fprintf(stderr,"Out of Memory!!\n");exit(1); {
fprintf(stderr, "Out of Memory!!\n");
exit(1);
} }
#ifdef __linux #ifdef __linux
@ -151,39 +100,35 @@ int main(int argc, char *argv[]){
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) for (m = from; m <= to; m += step)
{ {
timeg=0; timeg = 0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++)
{
for (l=0; l<loops; l++) for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{ {
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ begin();
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0); AMIN(&m, x, &inc_x);
AMIN (&m, x, &inc_x); end();
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
timeg += getsec();
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;

View File

@ -25,132 +25,74 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef ASUM #undef ASUM
#ifdef COMPLEX #ifdef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define ASUM BLASFUNC(dzasum) #define ASUM BLASFUNC(dzasum)
#else #else
#define ASUM BLASFUNC(scasum) #define ASUM BLASFUNC(scasum)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define ASUM BLASFUNC(dasum) #define ASUM BLASFUNC(dasum)
#else #else
#define ASUM BLASFUNC(sasum) #define ASUM BLASFUNC(sasum)
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__) int main(int argc, char *argv[])
{
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
FLOAT result; FLOAT result;
blasint m, i; blasint m, i;
blasint inc_x=1; blasint inc_x = 1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop;
double time1,timeg;
#else
struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg; double time1, timeg;
#endif
argc--;argv++; argc--;
argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;} if (argc > 0)
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} {
if (argc > 0) { step = atol(*argv); argc--; argv++;} from = atol(*argv);
argc--;
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); argv++;
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); }
if (argc > 0)
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); {
to = MAX(atol(*argv), from);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ argc--;
fprintf(stderr,"Out of Memory!!\n");exit(1); argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
} }
if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}
#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
@ -158,45 +100,33 @@ int main(int argc, char *argv[]){
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step) for (m = from; m <= to; m += step)
{ {
timeg=0; timeg = 0;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for (l=0; l<loops; l++) for (l = 0; l < loops; l++)
{ {
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &start, (struct timezone *)0);
#else
clock_gettime(CLOCK_REALTIME, &start);
#endif
result = ASUM (&m, x, &inc_x);
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
clock_gettime(CLOCK_REALTIME, &stop);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
#else
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
#endif
timeg += time1;
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
begin();
result = ASUM(&m, x, &inc_x);
end();
timeg += getsec();
} }
if (loops >1) if (loops > 1)
timeg /= loops; timeg /= loops;
#ifdef COMPLEX #ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
#else #else
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
#endif #endif
} }
return 0; return 0;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef AXPBY #undef AXPBY
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
@ -129,7 +58,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -176,16 +104,10 @@ int main(int argc, char *argv[]){
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
gettimeofday( &start, (struct timezone *)0); begin();
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y ); AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
end();
gettimeofday( &stop, (struct timezone *)0); timeg += getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
} }
timeg /= loops; timeg /= loops;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef AXPY #undef AXPY
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
@ -127,8 +56,6 @@ int main(int argc, char *argv[]){
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timespec start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -175,13 +102,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
clock_gettime( CLOCK_REALTIME, &start); begin();
AXPY (&m, alpha, x, &inc_x, y, &inc_y ); AXPY (&m, alpha, x, &inc_x, y, &inc_y );
clock_gettime( CLOCK_REALTIME, &stop); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9; time1 = getsec();
timeg += time1; timeg += time1;

104
benchmark/bench.h Normal file
View File

@ -0,0 +1,104 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop;
#else
struct timespec start = { 0, 0 }, stop = { 0, 0 };
#endif
double getsec()
{
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
#else
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
#endif
}
void begin() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &start, (struct timezone *)0);
#else
clock_gettime(CLOCK_REALTIME, &start);
#endif
}
void end() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &stop, (struct timezone *)0);
#else
clock_gettime(CLOCK_REALTIME, &stop);
#endif
}

View File

@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
double fabs(double); double fabs(double);
@ -71,41 +66,6 @@ double fabs(double);
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
static __inline double getmflops(int ratio, int m, double secs){ static __inline double getmflops(int ratio, int m, double secs){
double mm = (double)m; double mm = (double)m;
@ -145,7 +105,6 @@ int main(int argc, char *argv[]){
FLOAT maxerr; FLOAT maxerr;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -220,20 +179,19 @@ int main(int argc, char *argv[]){
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
gettimeofday( &start, (struct timezone *)0); begin();
POTRF(uplo[uplos], &m, b, &m, &info); POTRF(uplo[uplos], &m, b, &m, &info);
gettimeofday( &stop, (struct timezone *)0); end();
if (info != 0) { if (info != 0) {
fprintf(stderr, "Info = %d\n", info); fprintf(stderr, "Info = %d\n", info);
exit(1); exit(1);
} }
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
maxerr = 0.;
if (!(uplos & 1)) { if (!(uplos & 1)) {
for (j = 0; j < m; j++) { for (j = 0; j < m; j++) {

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef COPY #undef COPY
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
@ -128,11 +57,9 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1 = 0.0, timeg = 0.0; double time1 = 0.0, timeg = 0.0;
long nanos = 0; long nanos = 0;
time_t seconds = 0; time_t seconds = 0;
struct timespec time_start = { 0, 0 }, time_end = { 0, 0 };
argc--;argv++; argc--;argv++;
@ -176,15 +103,10 @@ int main(int argc, char *argv[]){
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
clock_gettime(CLOCK_REALTIME, &time_start); begin();
COPY (&m, x, &inc_x, y, &inc_y ); COPY (&m, x, &inc_x, y, &inc_y );
clock_gettime(CLOCK_REALTIME, &time_end); end();
timeg += getsec();
nanos = time_end.tv_nsec - time_start.tv_nsec;
seconds = time_end.tv_sec - time_start.tv_sec;
time1 = seconds + nanos / 1.e9;
timeg += time1;
} }
timeg /= loops; timeg /= loops;

View File

@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef DOT #undef DOT
#ifdef DOUBLE #ifdef DOUBLE
#define DOT BLASFUNC(ddot) #define DOT BLASFUNC(ddot)
#else #else
#define DOT BLASFUNC(sdot) #define DOT BLASFUNC(sdot)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
@ -122,7 +49,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -169,15 +95,12 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
result = DOT (&m, x, &inc_x, y, &inc_y ); result = DOT (&m, x, &inc_x, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
timeg += getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
} }

View File

@ -36,13 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GEEV #undef GEEV
@ -74,71 +68,6 @@ extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info ); FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork; FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
@ -154,7 +83,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -223,7 +151,7 @@ int main(int argc, char *argv[]){
for(m = from; m <= to; m += step){ for(m = from; m <= to; m += step){
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
gettimeofday( &start, (struct timezone *)0); begin();
lwork = -1; lwork = -1;
#ifndef COMPLEX #ifndef COMPLEX
@ -239,14 +167,14 @@ int main(int argc, char *argv[]){
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info); GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
#endif #endif
gettimeofday( &stop, (struct timezone *)0); end();
if (info) { if (info) {
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info); fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
exit(1); exit(1);
} }
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n", " %10.2f MFlops : %10.2f Sec : %d\n",

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GEMM #undef GEMM
@ -55,71 +49,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
IFLOAT *a, *b; IFLOAT *a, *b;
@ -139,7 +68,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1, timeg; double time1, timeg;
argc--;argv++; argc--;argv++;
@ -228,14 +156,14 @@ int main(int argc, char *argv[]){
ldc = m; ldc = m;
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k); fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
gettimeofday( &start, (struct timezone *)0); begin();
for (j=0; j<loops; j++) { for (j=0; j<loops; j++) {
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
} }
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg = time1/loops; timeg = time1/loops;
fprintf(stderr, fprintf(stderr,

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GEMM #undef GEMM
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@ -133,7 +62,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -187,16 +115,12 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0); end();
timeg += getsec();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
} }
timeg /= loops; timeg /= loops;

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GEMV #undef GEMV
@ -52,72 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -211,10 +139,10 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;
} }
@ -248,10 +176,10 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;
} }

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GER #undef GER
@ -49,72 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@ -131,7 +59,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -198,16 +125,13 @@ int main(int argc, char *argv[]){
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
gettimeofday( &start, (struct timezone *)0); begin();
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m); GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; timeg += getsec();
timeg += time1;
} }
timeg /= loops; timeg /= loops;

View File

@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
double fabs(double); double fabs(double);
@ -66,71 +61,6 @@ double fabs(double);
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b; FLOAT *a, *b;
@ -142,7 +72,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -194,22 +123,18 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
GESV (&m, &m, a, &m, ipiv, b, &m, &info); GESV (&m, &m, a, &m, ipiv, b, &m, &info);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
fprintf(stderr, fprintf(stderr,
"%10.2f MFlops %10.6f s\n", "%10.2f MFlops %10.6f s\n",
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1); COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
} }
return 0; return 0;

View File

@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef GETRF #undef GETRF
#undef GETRI #undef GETRI
@ -72,71 +67,6 @@
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info); extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a,*work; FLOAT *a,*work;
@ -148,7 +78,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -205,21 +134,21 @@ int main(int argc, char *argv[]){
exit(1); exit(1);
} }
gettimeofday( &start, (struct timezone *)0); begin();
lwork = -1; lwork = -1;
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info); GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
lwork = (blasint)wkopt[0]; lwork = (blasint)wkopt[0];
GETRI(&m, a, &m, ipiv, work, &lwork, &info); GETRI(&m, a, &m, ipiv, work, &lwork, &info);
gettimeofday( &stop, (struct timezone *)0); end();
if (info) { if (info) {
fprintf(stderr, "failed compute inverse matrix .. %d\n", info); fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
exit(1); exit(1);
} }
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n", " %10.2f MFlops : %10.2f Sec : %d\n",

View File

@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HBMV #undef HBMV
#ifdef DOUBLE #ifdef DOUBLE
#define HBMV BLASFUNC(zhbmv) #define HBMV BLASFUNC(zhbmv)
#else #else
#define HBMV BLASFUNC(chbmv) #define HBMV BLASFUNC(chbmv)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz) {
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size) {
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@ -125,7 +52,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -186,15 +112,13 @@ int main(int argc, char *argv[]){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; timeg += getsec();
timeg += time1;
} }

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HEMM #undef HEMM
@ -41,72 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define HEMM BLASFUNC(chemm) #define HEMM BLASFUNC(chemm)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@ -126,7 +54,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -170,13 +97,13 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",

View File

@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HEMV #undef HEMV
#ifdef DOUBLE #ifdef DOUBLE
#define HEMV BLASFUNC(zhemv) #define HEMV BLASFUNC(zhemv)
#else #else
#define HEMV BLASFUNC(chemv) #define HEMV BLASFUNC(chemv)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@ -124,7 +51,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -182,13 +108,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HER #undef HER
#ifdef DOUBLE #ifdef DOUBLE
#define HER BLASFUNC(zher) #define HER BLASFUNC(zher)
#else #else
#define HER BLASFUNC(cher) #define HER BLASFUNC(cher)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x; FLOAT *a, *x;
@ -126,8 +53,6 @@ int main(int argc, char *argv[]){
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -166,15 +91,13 @@ int main(int argc, char *argv[]){
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
HER (&uplo, &m, alpha, x, &incx, a, &m ); HER (&uplo, &m, alpha, x, &incx, a, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",

View File

@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HER2 #undef HER2
#ifdef DOUBLE #ifdef DOUBLE
#define HER2 BLASFUNC(zher2) #define HER2 BLASFUNC(zher2)
#else #else
#define HER2 BLASFUNC(cher2) #define HER2 BLASFUNC(cher2)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@ -127,7 +54,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -169,16 +95,13 @@ int main(int argc, char *argv[]){
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m ); HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
gettimeofday( &start, (struct timezone *)0);
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HER2K #undef HER2K
#ifdef DOUBLE #ifdef DOUBLE
@ -40,72 +34,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define HER2K BLASFUNC(cher2k) #define HER2K BLASFUNC(cher2k)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@ -125,7 +53,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -169,13 +96,13 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",

View File

@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HERK #undef HERK
#ifdef DOUBLE #ifdef DOUBLE
#define HERK BLASFUNC(zherk) #define HERK BLASFUNC(zherk)
#else #else
#define HERK BLASFUNC(cherk) #define HERK BLASFUNC(cherk)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *c; FLOAT *a, *c;
@ -127,7 +54,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -167,18 +93,17 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
} }
return 0; return 0;

View File

@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef HPMV #undef HPMV
#ifdef DOUBLE #ifdef DOUBLE
#define HPMV BLASFUNC(zhpmv) #define HPMV BLASFUNC(zhpmv)
#else #else
#define HPMV BLASFUNC(chpmv) #define HPMV BLASFUNC(chpmv)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz) {
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size) {
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@ -124,7 +51,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -183,13 +109,13 @@ int main(int argc, char *argv[]){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IAMAX #undef IAMAX
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
IAMAX (&m, x, &inc_x); IAMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IAMIN #undef IAMIN
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
IAMIN (&m, x, &inc_x); IAMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IMAX #undef IMAX
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
IMAX (&m, x, &inc_x); IMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef IMIN #undef IMIN
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
IMIN (&m, x, &inc_x); IMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
double fabs(double); double fabs(double);
@ -72,71 +67,6 @@ double fabs(double);
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b; FLOAT *a, *b;
@ -151,7 +81,6 @@ int main(int argc, char *argv[]){
FLOAT maxerr; FLOAT maxerr;
struct timeval start, stop;
double time1, time2; double time1, time2;
argc--;argv++; argc--;argv++;
@ -198,31 +127,31 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
GETRF (&m, &m, a, &m, ipiv, &info); GETRF (&m, &m, a, &m, ipiv, &info);
gettimeofday( &stop, (struct timezone *)0); end();
if (info) { if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info); fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1); exit(1);
} }
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
gettimeofday( &start, (struct timezone *)0); begin();
GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info); GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);
gettimeofday( &stop, (struct timezone *)0); end();
if (info) { if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info); fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1); exit(1);
} }
time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time2 = getsec();
maxerr = 0.; maxerr = 0.;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef NAMAX #undef NAMAX
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
NAMAX (&m, x, &inc_x); NAMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef NAMIN #undef NAMIN
@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
NAMIN (&m, x, &inc_x); NAMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef NRM2 #undef NRM2
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
NRM2 (&m, x, &inc_x); NRM2 (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
double fabs(double); double fabs(double);
@ -86,37 +81,7 @@ double fabs(double);
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info); // extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info); // extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
@ -141,7 +106,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -217,18 +181,18 @@ int main(int argc, char *argv[]){
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
gettimeofday( &start, (struct timezone *)0); begin();
POTRF(uplo[uplos], &m, b, &m, &info); POTRF(uplo[uplos], &m, b, &m, &info);
gettimeofday( &stop, (struct timezone *)0); end();
if (info != 0) { if (info != 0) {
fprintf(stderr, "Potrf info = %d\n", info); fprintf(stderr, "Potrf info = %d\n", info);
exit(1); exit(1);
} }
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6; flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
if ( btest == 'S' ) if ( btest == 'S' )
@ -240,17 +204,17 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info); POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
gettimeofday( &stop, (struct timezone *)0); end();
if (info != 0) { if (info != 0) {
fprintf(stderr, "Potrs info = %d\n", info); fprintf(stderr, "Potrs info = %d\n", info);
exit(1); exit(1);
} }
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6; flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
} }
@ -258,18 +222,18 @@ int main(int argc, char *argv[]){
if ( btest == 'I' ) if ( btest == 'I' )
{ {
gettimeofday( &start, (struct timezone *)0); begin();
POTRI(uplo[uplos], &m, b, &m, &info); POTRI(uplo[uplos], &m, b, &m, &info);
gettimeofday( &stop, (struct timezone *)0); end();
if (info != 0) { if (info != 0) {
fprintf(stderr, "Potri info = %d\n", info); fprintf(stderr, "Potri info = %d\n", info);
exit(1); exit(1);
} }
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6; flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
} }

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef ROT #undef ROT
@ -52,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
@ -133,7 +63,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -179,13 +108,13 @@ int main(int argc, char *argv[]){
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
gettimeofday( &start, (struct timezone *)0); begin();
ROT (&m, x, &inc_x, y, &inc_y, c, s); ROT (&m, x, &inc_x, y, &inc_y, c, s);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,12 +25,7 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef ROTM #undef ROTM
@ -40,72 +35,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ROTM BLASFUNC(srotm) #define ROTM BLASFUNC(srotm)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz)
{
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv) {
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =
shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT | 0600)) < 0) {
printf("Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1) {
printf("Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
@ -122,7 +51,7 @@ int main(int argc, char *argv[])
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1, timeg; double time1, timeg;
argc--; argc--;
@ -188,14 +117,13 @@ int main(int argc, char *argv[])
} }
for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
gettimeofday(&start, (struct timezone *)0); begin();
ROTM(&m, x, &inc_x, y, &inc_y, param); ROTM(&m, x, &inc_x, y, &inc_y, param);
gettimeofday(&stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + time1 = getsec();
(double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1; timeg += time1;
} }

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SCAL #undef SCAL
@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
@ -128,7 +57,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -174,13 +102,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
SCAL (&m, alpha, x, &inc_x); SCAL (&m, alpha, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,17 +25,10 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SPMV #undef SPMV
#ifndef COMPLEX #ifndef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
@ -54,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@ -135,7 +63,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -193,13 +120,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SPR #undef SPR
@ -41,73 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SPR BLASFUNC(sspr) #define SPR BLASFUNC(sspr)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a,*c; FLOAT *a,*c;
@ -129,7 +56,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -173,13 +99,13 @@ int main(int argc, char *argv[]){
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
SPR (&uplo, &m, alpha, c, &inc_x, a); SPR (&uplo, &m, alpha, c, &inc_x, a);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;
} }

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SPR2 #undef SPR2
@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a,*b,*c; FLOAT *a,*b,*c;
@ -129,7 +58,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -182,13 +110,13 @@ int main(int argc, char *argv[]){
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a); SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;
} }

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SWAP #undef SWAP
@ -49,71 +44,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
@ -128,7 +58,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -175,13 +104,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
SWAP (&m, x, &inc_x, y, &inc_y ); SWAP (&m, x, &inc_x, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SYMM #undef SYMM
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -181,13 +109,13 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SYMV #undef SYMV
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@ -134,7 +63,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -192,13 +120,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SYR #undef SYR
@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x,*a; FLOAT *x,*a;
@ -124,7 +53,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -165,13 +93,13 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
SYR (&uplo, &m, alpha, x, &inc_x, a, &m ); SYR (&uplo, &m, alpha, x, &inc_x, a, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SYR2 #undef SYR2
@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYR2 BLASFUNC(ssyr2) #define SYR2 BLASFUNC(ssyr2)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y, *a; FLOAT *x, *y, *a;
@ -125,7 +53,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -174,13 +101,13 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m ); SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SYR2K #undef SYR2K
@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@ -137,7 +67,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -181,13 +110,13 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef SYRK #undef SYRK
@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *c; FLOAT *a, *c;
@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -177,13 +105,13 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef TPMV #undef TPMV
@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1) {
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
@ -112,7 +73,6 @@ int main(int argc, char *argv[])
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg; double time1, timeg;
argc--;argv++; argc--;argv++;
@ -153,11 +113,11 @@ int main(int argc, char *argv[])
} }
for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
clock_gettime(CLOCK_REALTIME, &start); begin();
TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x); TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x);
clock_gettime(CLOCK_REALTIME, &stop); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9; time1 = getsec();
timeg += time1; timeg += time1;
} }

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef TPSV #undef TPSV
@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1) {
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
@ -112,7 +73,6 @@ int main(int argc, char *argv[])
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg; double time1, timeg;
argc--;argv++; argc--;argv++;
@ -153,11 +113,11 @@ int main(int argc, char *argv[])
} }
for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
clock_gettime(CLOCK_REALTIME, &start); begin();
TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x); TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x);
clock_gettime(CLOCK_REALTIME, &stop); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9; time1 = getsec();
timeg += time1; timeg += time1;
} }

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef TRMM #undef TRMM
@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b; FLOAT *a, *b;
@ -141,7 +71,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -180,13 +109,13 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef TRMV #undef TRMV
@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1) {
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
@ -112,7 +73,6 @@ int main(int argc, char *argv[])
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg; double time1, timeg;
argc--;argv++; argc--;argv++;
@ -153,11 +113,11 @@ int main(int argc, char *argv[])
} }
for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
clock_gettime(CLOCK_REALTIME, &start); begin();
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x); TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
clock_gettime(CLOCK_REALTIME, &stop); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9; time1 = getsec();
timeg += time1; timeg += time1;
} }

View File

@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef TRSM #undef TRSM
@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *b; FLOAT *a, *b;
@ -151,7 +81,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;
argc--;argv++; argc--;argv++;
@ -196,13 +125,13 @@ int main(int argc, char *argv[]){
} }
} }
gettimeofday( &start, (struct timezone *)0); begin();
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;
} }

View File

@ -25,14 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include <time.h>
#include "common.h"
#undef GEMV #undef GEMV
#undef TRSV #undef TRSV
@ -55,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x; FLOAT *a, *x;
@ -133,7 +61,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timespec time_start, time_end;
time_t seconds = 0; time_t seconds = 0;
double time1,timeg; double time1,timeg;
@ -189,19 +116,13 @@ int main(int argc, char *argv[]){
for(l =0;l< loops;l++){ for(l =0;l< loops;l++){
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start); begin();
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x); TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);
end();
clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end); time1 = getsec();
nanos = time_end.tv_nsec - time_start.tv_nsec;
seconds = time_end.tv_sec - time_start.tv_sec;
time1 = seconds + nanos /1.e9;
timeg += time1; timeg += time1;
} }
timeg /= loops; timeg /= loops;
long long muls = n*(n+1)/2.0; long long muls = n*(n+1)/2.0;
long long adds = (n - 1.0)*n/2.0; long long adds = (n - 1.0)*n/2.0;

View File

@ -25,90 +25,18 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#define RETURN_BY_STACK 1
#include "common.h"
#define RETURN_BY_STACK 1
#undef DOT #undef DOT
#ifdef DOUBLE #ifdef DOUBLE
#define DOT BLASFUNC(zdotu) #define DOT BLASFUNC(zdotu)
#else #else
#define DOT BLASFUNC(cdotu) #define DOT BLASFUNC(cdotu)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
@ -123,7 +51,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -170,13 +97,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
DOT (&result, &m, x, &inc_x, y, &inc_y ); DOT (&result, &m, x, &inc_x, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h> #include "bench.h"
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#undef DOT #undef DOT
@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define DOT BLASFUNC(cdotu) #define DOT BLASFUNC(cdotu)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x, *y; FLOAT *x, *y;
@ -122,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@ -169,15 +96,15 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0); begin();
#ifdef RETURN_BY_STACK #ifdef RETURN_BY_STACK
DOT (&result , &m, x, &inc_x, y, &inc_y ); DOT (&result , &m, x, &inc_x, y, &inc_y );
#else #else
result = DOT (&m, x, &inc_x, y, &inc_y ); result = DOT (&m, x, &inc_x, y, &inc_y );
#endif #endif
gettimeofday( &stop, (struct timezone *)0); end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; time1 = getsec();
timeg += time1; timeg += time1;

View File

@ -393,6 +393,7 @@ void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPE
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout); void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
/* dot production of BFLOAT16 input arrays, and output as float */ /* dot production of BFLOAT16 input arrays, and output as float */
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy); float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -49,6 +49,7 @@ if (DYNAMIC_ARCH)
if (POWER) if (POWER)
set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10) set(DYNAMIC_CORE POWER6 POWER8 POWER9 POWER10)
set(CCOMMON_OPT "${CCOMMON_OPT} -DHAVE_P10_SUPPORT")
endif () endif ()
if (X86) if (X86)

View File

@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
endif () endif ()
endif () endif ()
if (${CORE} STREQUAL "SKYLAKEX") if (${CORE} STREQUAL SKYLAKEX)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512) if (NOT NO_AVX512)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX")
endif () endif ()
endif () endif ()
if (${CORE} STREQUAL "COOPERLAKE") if (${CORE} STREQUAL COOPERLAKE)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512) if (NOT NO_AVX512)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)

View File

@ -184,8 +184,8 @@ macro(SetDefaultL2)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c) set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c) set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
if (BUILD_BFLOAT16) if (BUILD_BFLOAT16)
set(SBGEMVNKERNEL ../arm/gemv_n.c) set(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
set(SBGEMVTKERNEL ../arm/gemv_t.c) set(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
set(SHGERKERNEL ../generic/ger.c) set(SHGERKERNEL ../generic/ger.c)
endif () endif ()
endmacro () endmacro ()

View File

@ -84,6 +84,14 @@ if (X86)
set(NO_EXPRECISION 1) set(NO_EXPRECISION 1)
endif () endif ()
if (DYNAMIC_ARCH)
if (TARGET)
if (${TARGET} STREQUAL "GENERIC")
set(NO_EXPRECISION 1)
endif ()
endif ()
endif ()
if (UTEST_CHECK) if (UTEST_CHECK)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
set(SANITY_CHECK 1) set(SANITY_CHECK 1)

View File

@ -416,6 +416,29 @@ endif ()
set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4) set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16) set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "VORTEX")
file(APPEND ${TARGET_CONF_TEMP}
"#define ARMV8\n"
"#define L1_CODE_SIZE\t32768\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t4\n"
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t4\n"
"#define L2_SIZE\t5262144\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t8\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "POWER6") elseif ("${TCORE}" STREQUAL "POWER6")
file(APPEND ${TARGET_CONF_TEMP} file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE 32768\n" "#define L1_DATA_SIZE 32768\n"
@ -533,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING)
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
endif () endif ()
endif () endif ()
unset (HAVE_AVX2)
unset (HAVE_AVX)
unset (HAVE_FMA3)
unset (HAVE_MMX)
unset (HAVE_SSE)
unset (HAVE_SSE2)
unset (HAVE_SSE3)
unset (HAVE_SSSE3)
unset (HAVE_SSE4A)
unset (HAVE_SSE4_1)
unset (HAVE_SSE4_2)
unset (HAVE_NEON)
unset (HAVE_VFP)
unset (HAVE_VFPV3)
unset (HAVE_VFPV4)
message(STATUS "Running getarch") message(STATUS "Running getarch")
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way # use the cmake binary w/ the -E param to run a shell command in a cross-platform way

View File

@ -44,50 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
endif () endif ()
endif () endif ()
if (DEFINED TARGET)
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
# endif()
endif()
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_SSE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
endif()
if (DEFINED HAVE_SSE2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
endif()
if (DEFINED HAVE_SSE4_1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
endif()
endif()
if (DEFINED TARGET) if (DEFINED TARGET)
message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --")
message(STATUS "Targeting the ${TARGET} architecture.") message(STATUS "Targeting the ${TARGET} architecture.")
set(GETARCH_FLAGS "-DFORCE_${TARGET}") set(GETARCH_FLAGS "-DFORCE_${TARGET}")
endif () endif ()
@ -187,6 +146,63 @@ else()
endif () endif ()
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
if (DEFINED TARGET)
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
# endif()
endif()
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_AVX)
if (NOT NO_AVX)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx")
endif()
endif()
if (DEFINED HAVE_AVX2)
if (NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_FMA3)
if (NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma")
endif()
endif()
if (DEFINED HAVE_SSE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
endif()
if (DEFINED HAVE_SSE2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
endif()
if (DEFINED HAVE_SSE4_1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
endif()
endif()
if (DEFINED BINARY) if (DEFINED BINARY)
message(STATUS "Compiling a ${BINARY}-bit binary.") message(STATUS "Compiling a ${BINARY}-bit binary.")
endif () endif ()

View File

@ -142,14 +142,8 @@ REALNAME:
#define HUGE_PAGESIZE ( 4 << 20) #define HUGE_PAGESIZE ( 4 << 20)
#ifndef BUFFERSIZE #ifndef BUFFERSIZE
#if defined(CORTEXA57)
#define BUFFER_SIZE (20 << 20)
#elif defined(TSV110) || defined(EMAG8180)
#define BUFFER_SIZE (32 << 20) #define BUFFER_SIZE (32 << 20)
#else #else
#define BUFFER_SIZE (16 << 20)
#endif
#else
#define BUFFER_SIZE (32 << BUFFERSIZE) #define BUFFER_SIZE (32 << BUFFERSIZE)
#endif #endif

View File

@ -250,6 +250,8 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *, void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
xdouble *, blasint *, xdouble *, blasint *); xdouble *, blasint *, xdouble *, blasint *);
void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *,
bfloat16 *, blasint *, float *, float *, blasint *);
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *, void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
float *, blasint *, float *, float *, blasint *); float *, blasint *, float *, float *, blasint *);
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *, void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,

View File

@ -44,6 +44,10 @@
extern "C" { extern "C" {
#endif #endif
int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
int sbgemv_thread_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *); int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);

View File

@ -646,10 +646,12 @@
#elif defined(BFLOAT16) #elif defined(BFLOAT16)
#define D_TO_BF16_K SBDTOBF16_K #define D_TO_BF16_K SBDTOBF16_K
#define D_BF16_TO_K DBF16TOD_K #define D_BF16_TO_K DBF16TOD_K
#define S_TO_BF16_K SBSTOBF16_K #define S_TO_BF16_K SBSTOBF16_K
#define S_BF16_TO_K SBF16TOS_K #define S_BF16_TO_K SBF16TOS_K
#define SBGEMV_N SBGEMV_N_K
#define SBGEMV_T SBGEMV_T_K
#define AMAX_K SAMAX_K #define AMAX_K SAMAX_K
#define AMIN_K SAMIN_K #define AMIN_K SAMIN_K

View File

@ -78,8 +78,8 @@ BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG);
int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*sbgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int (*sbgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);

View File

@ -844,8 +844,8 @@ Lmcount$lazy_ptr:
#define BUFFER_SIZE ( 2 << 20) #define BUFFER_SIZE ( 2 << 20)
#elif defined(PPC440FP2) #elif defined(PPC440FP2)
#define BUFFER_SIZE ( 16 << 20) #define BUFFER_SIZE ( 16 << 20)
#elif defined(POWER8) || defined(POWER9) || defined(POWER10) #elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10)
#define BUFFER_SIZE ( 64 << 20) #define BUFFER_SIZE ( 64 << 22)
#else #else
#define BUFFER_SIZE ( 16 << 20) #define BUFFER_SIZE ( 16 << 20)
#endif #endif

View File

@ -8,6 +8,8 @@
#define SBDTOBF16_K sbdtobf16_k #define SBDTOBF16_K sbdtobf16_k
#define SBF16TOS_K sbf16tos_k #define SBF16TOS_K sbf16tos_k
#define DBF16TOD_K dbf16tod_k #define DBF16TOD_K dbf16tod_k
#define SBGEMV_N_K sbgemv_n
#define SBGEMV_T_K sbgemv_t
#define SBGEMM_ONCOPY sbgemm_oncopy #define SBGEMM_ONCOPY sbgemm_oncopy
#define SBGEMM_OTCOPY sbgemm_otcopy #define SBGEMM_OTCOPY sbgemm_otcopy
@ -29,6 +31,8 @@
#define SBDTOBF16_K gotoblas -> sbdtobf16_k #define SBDTOBF16_K gotoblas -> sbdtobf16_k
#define SBF16TOS_K gotoblas -> sbf16tos_k #define SBF16TOS_K gotoblas -> sbf16tos_k
#define DBF16TOD_K gotoblas -> dbf16tod_k #define DBF16TOD_K gotoblas -> dbf16tod_k
#define SBGEMV_N_K gotoblas -> sbgemv_n
#define SBGEMV_T_K gotoblas -> sbgemv_t
#define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy #define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy
#define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy #define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy

View File

@ -424,7 +424,7 @@ void get_cpuconfig(void)
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0); sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
printf("#define L1_DATA_SIZE %d \n",value); printf("#define L1_DATA_SIZE %d \n",value);
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0); sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
printf("#define L2_DATA_SIZE %d \n",value); printf("#define L2_SIZE %d \n",value);
break; break;
#endif #endif
} }

View File

@ -202,7 +202,7 @@ int support_avx(){
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
xgetbv(0, &eax, &edx); xgetbv(0, &eax, &edx);
if((eax & 6) == 6){ if((eax & 6) == 6){
ret=1; //OS support AVX ret=1; //OS supports saving xmm and ymm registers (6 = (1<<1) | (1<<2))
} }
} }
return ret; return ret;
@ -219,8 +219,8 @@ int support_avx2(){
if (!support_avx()) if (!support_avx())
return 0; return 0;
cpuid(7, &eax, &ebx, &ecx, &edx); cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) != 0) if((ebx & (1<<5)) != 0)
ret=1; //OS supports AVX2 ret=1; //CPU supports AVX2
return ret; return ret;
#else #else
return 0; return 0;
@ -235,14 +235,14 @@ int support_avx512(){
if (!support_avx()) if (!support_avx())
return 0; return 0;
cpuid(7, &eax, &ebx, &ecx, &edx); cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & 32) != 32){ if((ebx & (1<<5)) == 0){
ret=0; //OS does not even support AVX2 ret=0; //cpu does not have avx2 flag
} }
if((ebx & (1<<31)) != 0){ if((ebx & (1<<31)) != 0){ //AVX512VL flag
xgetbv(0, &eax, &edx); xgetbv(0, &eax, &edx);
if((eax & 0xe0) == 0xe0) if((eax & 0xe0) == 0xe0)
ret=1; //OS supports AVX512VL ret=1; //OS supports saving zmm registers
} }
return ret; return ret;
#else #else
return 0; return 0;

View File

@ -413,7 +413,13 @@ XBLASOBJS += \
xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUN.$(SUFFIX) \ xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUN.$(SUFFIX) \
xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLN.$(SUFFIX) \ xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLN.$(SUFFIX) \
xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \ xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX) \ xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX)
ifeq ($(BUILD_BFLOAT16),1)
SBBLASOBJS += \
sbgemv_thread_n$(TSUFFIX).$(SUFFIX) \
sbgemv_thread_t$(TSUFFIX).$(SUFFIX)
endif
endif endif
@ -3693,4 +3699,12 @@ xtrsv_CUU.$(SUFFIX) xtrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F) $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
ifeq ($(BUILD_BFLOAT16),1)
sbgemv_thread_n.$(SUFFIX) sbgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
sbgemv_thread_t.$(SUFFIX) sbgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
endif
include ../../Makefile.tail include ../../Makefile.tail

View File

@ -0,0 +1,149 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include "common.h"
#ifndef TRANSA
#define SBGEMV SBGEMV_N
#else
#define SBGEMV SBGEMV_T
#endif
static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *dummy2, BLASLONG dummy3){
bfloat16 *a, *x;
float *y;
BLASLONG lda, incx, incy;
BLASLONG m_from, m_to, n_from, n_to;
a = (bfloat16 *)args->a;
x = (bfloat16 *)args->b;
y = (float *)args->c;
lda = args->lda;
incx = args->ldb;
incy = args->ldc;
#ifndef TRANSA // N
m_from = *(range_m + 0);
m_to = *(range_m + 1);
n_from = 0;
n_to = args -> n;
a += m_from;
y += m_from * incy;
#else // T
m_from = 0;
m_to = args->m;
n_from = *(range_n + 0);
n_to = *(range_n + 1);
a += n_from * lda;
y += n_from * incy;
#endif
SBGEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy);
return 0;
}
int CNAME(BLASLONG m, BLASLONG n, float alpha, bfloat16 *a, BLASLONG lda, bfloat16 *x, BLASLONG incx, float beta, float *y, BLASLONG incy, int threads)
{
blas_arg_t args;
blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];
#ifndef TRANSA
BLASLONG width_for_split = m;
#else
BLASLONG width_for_split = n;
#endif
BLASLONG BLOCK_WIDTH = width_for_split/threads;
int mode = BLAS_BFLOAT16 | BLAS_REAL;
args.m = m;
args.n = n;
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)y;
args.lda = lda;
args.ldb = incx;
args.ldc = incy;
args.alpha = (void *)&alpha;
args.beta = (void *)&beta;
range[0] = 0;
int thread_idx;
for (thread_idx=0; thread_idx<threads; thread_idx++) {
if (thread_idx != threads-1) {
range[thread_idx + 1] = range[thread_idx] + BLOCK_WIDTH;
} else {
range[thread_idx + 1] = range[thread_idx] + width_for_split;
}
queue[thread_idx].mode = mode;
queue[thread_idx].routine = sbgemv_kernel;
queue[thread_idx].args = &args;
#ifndef TRANSA
queue[thread_idx].range_m = &range[thread_idx];
queue[thread_idx].range_n = NULL;
#else
queue[thread_idx].range_m = NULL;
queue[thread_idx].range_n = &range[thread_idx];
#endif
queue[thread_idx].sa = NULL;
queue[thread_idx].sb = NULL;
queue[thread_idx].next = &queue[thread_idx + 1];
width_for_split -= BLOCK_WIDTH;
}
if (thread_idx) {
queue[0].sa = NULL;
queue[0].sb = NULL;
queue[thread_idx - 1].next = NULL;
exec_blas(thread_idx, queue);
}
return 0;
}

View File

@ -80,7 +80,7 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha
break; break;
} }
mode |= BLAS_LEGACY; if(!(mode & BLAS_PTHREAD)) mode |= BLAS_LEGACY;
for (i = 0; i < nthreads; i++) blas_queue_init(&queue[i]); for (i = 0; i < nthreads; i++) blas_queue_init(&queue[i]);

View File

@ -352,7 +352,6 @@ fprintf(stderr,"UNHANDLED COMPLEX\n");
/* Other types in future */ /* Other types in future */
} }
} }
if (!sb) fprintf(stderr,"SB not declared!!!\n");
queue->sb=sb; queue->sb=sb;
} }
} }

View File

@ -476,12 +476,15 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
routine = queue -> routine; routine = queue -> routine;
if (!(queue -> mode & BLAS_LEGACY)) { if (queue -> mode & BLAS_LEGACY) {
legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
} else
if (queue -> mode & BLAS_PTHREAD) {
void (*pthreadcompat)(void *) = queue -> routine;
(pthreadcompat)(queue -> args);
} else
(routine)(queue -> args, queue -> range_m, queue -> range_n, (routine)(queue -> args, queue -> range_m, queue -> range_n,
queue -> sa, queue -> sb, 0); queue -> sa, queue -> sb, 0);
} else {
legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
}
if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next); if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);

View File

@ -330,8 +330,8 @@ int support_avx2(){
if (!support_avx()) if (!support_avx())
return 0; return 0;
cpuid(7, &eax, &ebx, &ecx, &edx); cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) != 0) if((ebx & (1<<5)) != 0)
ret=1; //OS supports AVX2 ret=1; //AVX2 flag is set
return ret; return ret;
#else #else
return 0; return 0;
@ -346,13 +346,13 @@ int support_avx512(){
if (!support_avx()) if (!support_avx())
return 0; return 0;
cpuid(7, &eax, &ebx, &ecx, &edx); cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) == 0){ if((ebx & (1<<5)) == 0){
ret=0; //OS does not even support AVX2 ret=0; //cpu does not have avx2 flag
} }
if((ebx & (1u<<31)) != 0){ if((ebx & (1<<31)) != 0){ //AVX512VL flag is set
xgetbv(0, &eax, &edx); xgetbv(0, &eax, &edx);
if((eax & 0xe0) == 0xe0) if((eax & 0xe0) == 0xe0)
ret=1; //OS supports AVX512VL ret=1; //OS supports saving zmm register
} }
return ret; return ret;
#else #else

View File

@ -139,19 +139,30 @@ static gotoblas_t *force_coretype(char *coretype) {
static gotoblas_t *get_coretype(void) { static gotoblas_t *get_coretype(void) {
int implementer, variant, part, arch, revision, midr_el1; int implementer, variant, part, arch, revision, midr_el1;
char coremsg[128];
#if (!defined OS_LINUX && !defined OS_ANDROID)
return NULL;
#endif
#if (defined OS_LINUX || defined OS_ANDROID)
if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) { if (!(getauxval(AT_HWCAP) & HWCAP_CPUID)) {
char coremsg[128]; #ifdef __linux
FILE *infile;
char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL;
p = (char *) NULL ;
infile = fopen("/sys/devices/system/cpu/cpu0/regs/identification/midr_el1","r");
if (!infile) return NULL;
fgets(buffer, sizeof(buffer), infile);
midr_el1=strtoul(buffer,NULL,16);
fclose(infile);
#else
snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n"); snprintf(coremsg, 128, "Kernel lacks cpuid feature support. Auto detection of core type failed !!!\n");
openblas_warning(1, coremsg); openblas_warning(1, coremsg);
return NULL; return NULL;
}
#else
return NULL;
#endif #endif
} else {
get_cpu_ftr(MIDR_EL1, midr_el1); get_cpu_ftr(MIDR_EL1, midr_el1);
}
/* /*
* MIDR_EL1 * MIDR_EL1
* *
@ -219,6 +230,9 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_FALKOR; return &gotoblas_FALKOR;
} }
break; break;
default:
snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
openblas_warning(1, coremsg);
} }
return NULL; return NULL;
} }

View File

@ -6,10 +6,10 @@ extern gotoblas_t gotoblas_POWER8;
#if (!defined __GNUC__) || ( __GNUC__ >= 6) #if (!defined __GNUC__) || ( __GNUC__ >= 6)
extern gotoblas_t gotoblas_POWER9; extern gotoblas_t gotoblas_POWER9;
#endif #endif
#if (!defined __GNUC__) || ( __GNUC__ >= 11) \ //#if (!defined __GNUC__) || ( __GNUC__ >= 11) \
|| (__GNUC__ == 10 && __GNUC_MINOR__ >= 2) // || (__GNUC__ == 10 && __GNUC_MINOR__ >= 2)
#define HAVE_P10_SUPPORT 1 //#define HAVE_P10_SUPPORT 1
#endif //#endif
#ifdef HAVE_P10_SUPPORT #ifdef HAVE_P10_SUPPORT
extern gotoblas_t gotoblas_POWER10; extern gotoblas_t gotoblas_POWER10;
#endif #endif

View File

@ -1767,11 +1767,11 @@ int get_num_procs(void);
int get_num_procs(void) { int get_num_procs(void) {
static int nums = 0; static int nums = 0;
#if defined(__GLIBC_PREREQ)
cpu_set_t cpuset,*cpusetp; cpu_set_t cpuset,*cpusetp;
size_t size; size_t size;
int ret; int ret;
#if defined(__GLIBC_PREREQ)
#if !__GLIBC_PREREQ(2, 7) #if !__GLIBC_PREREQ(2, 7)
int i; int i;
#if !__GLIBC_PREREQ(2, 6) #if !__GLIBC_PREREQ(2, 6)

View File

@ -120,10 +120,10 @@ dll : ../$(LIBDLLNAME)
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
$(LIBPREFIX).def : gensymbol $(LIBPREFIX).def : gensymbol
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
libgoto_hpl.def : gensymbol libgoto_hpl.def : gensymbol
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
ifeq ($(OSNAME), Darwin) ifeq ($(OSNAME), Darwin)
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib
@ -258,16 +258,16 @@ static : ../$(LIBNAME)
rm -f goto.$(SUFFIX) rm -f goto.$(SUFFIX)
osx.def : gensymbol ../Makefile.system ../getarch.c osx.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
aix.def : gensymbol ../Makefile.system ../getarch.c aix.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
objcopy.def : gensymbol ../Makefile.system ../getarch.c objcopy.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
objconv.def : gensymbol ../Makefile.system ../getarch.c objconv.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
test : linktest.c test : linktest.c
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.

View File

@ -50,8 +50,8 @@
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2, zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
zgeadd, dzsum); zgeadd, dzsum);
@cblasobjs = (lsame, xerbla); @blasobjs = (lsame, xerbla);
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod); @bfblasobjs = (sbgemm, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
@cblasobjsc = ( @cblasobjsc = (
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
@ -72,7 +72,7 @@
); );
@cblasobjss = ( @cblasobjss = (
cblas_sasum, cblas_saxpy, cblas_sasum, cblas_saxpy, cblas_saxpby,
cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm, cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm,
cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg, cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg,
cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr, cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr,
@ -94,7 +94,7 @@
@cblasobjs = ( cblas_xerbla ); @cblasobjs = ( cblas_xerbla );
@halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod); @bfcblasobjs = (cblas_sbgemm, cblas_sbgemv, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
@exblasobjs = ( @exblasobjs = (
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm, qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
@ -415,7 +415,7 @@ zpotri,
cgeqrt, cgeqrt2, cgeqrt3, cgemqrt, cgeqrt, cgeqrt2, cgeqrt3, cgemqrt,
ctpqrt, ctpqrt2, ctpmqrt, ctprfb, ctpqrt, ctpqrt2, ctpmqrt, ctprfb,
); );
@lapack2objszc = ( @lapackobjs2zc = (
# ZCLASRC -- Double-single mixed precision complex routines called from # ZCLASRC -- Double-single mixed precision complex routines called from
# single, single-extra and double precision complex LAPACK # single, single-extra and double precision complex LAPACK
# routines (i.e. from CLASRC, CXLASRC, ZLASRC). # routines (i.e. from CLASRC, CXLASRC, ZLASRC).
@ -425,7 +425,7 @@ zpotri,
cpotrs, cpotrs,
); );
@lapack2objsd = ( @lapackobjs2d = (
# DLASRC -- Double precision real LAPACK routines # DLASRC -- Double precision real LAPACK routines
# already provided by @lapackobjs: # already provided by @lapackobjs:
# dgesv, dgetf2, dgetrs, dlaswp, dlauu2, dlauum, dpotf2, dpotrf, dpotri, # dgesv, dgetf2, dgetrs, dlaswp, dlauu2, dlauum, dpotf2, dpotrf, dpotri,
@ -568,7 +568,7 @@ zpotri,
); );
# functions added for lapack-3.6.0 # functions added for lapack-3.6.0
@lapack2objsc = ( @lapack2objsc, @lapackobjs2c = ( @lapackobjs2c,
cgejsv, cgejsv,
cgesvdx, cgesvdx,
cgesvj, cgesvj,
@ -604,7 +604,7 @@ zpotri,
csyr2, csyr2,
cunm22, cunm22,
); );
@lapackobjs2d = (@lapack2objsd, @lapackobjs2d = (@lapackobjs2d,
dbdsvdx, dbdsvdx,
dgesvdx, dgesvdx,
dgetrf2, dgetrf2,
@ -637,7 +637,7 @@ zpotri,
dpotrf2, dpotrf2,
dsecnd, dsecnd,
); );
@lapack2objss = (@lapack2objss, @lapackobjs2s = (@lapackobjs2s,
sbdsvdx, sbdsvdx,
second, second,
sgesvdx, sgesvdx,
@ -670,7 +670,7 @@ zpotri,
sorm22, sorm22,
spotrf2, spotrf2,
); );
@lapack2objsz = (@lapack2objsz, @lapackobjs2z = (@lapackobjs2z,
zgejsv, zgejsv,
zgesvdx, zgesvdx,
zgesvj, zgesvj,
@ -707,7 +707,7 @@ zpotri,
zunm22, zunm22,
); );
# functions added for lapack-3.7.0 # functions added for lapack-3.7.0
@lapack2objss = (@lapack2objss, @lapackobjs2s = (@lapackobjs2s,
slarfy, slarfy,
strevc3, strevc3,
sgelqt, sgelqt,
@ -726,7 +726,7 @@ zpotri,
stplqt2, stplqt2,
stpmlqt, stpmlqt,
); );
@lapack2objsd = (@lapack2objsd, @lapackobjs2d = (@lapackobjs2d,
dlarfy, dlarfy,
dsyconvf, dsyconvf,
dtrevc3, dtrevc3,
@ -746,7 +746,7 @@ zpotri,
dtplqt2, dtplqt2,
dtpmlqt, dtpmlqt,
); );
@lapack2objsc = (@lapack2objsc, @lapackobjs2c = (@lapackobjs2c,
clarfy, clarfy,
csyconvf, csyconvf,
ctrevc3, ctrevc3,
@ -766,7 +766,7 @@ zpotri,
ctplqt2, ctplqt2,
ctpmlqt, ctpmlqt,
); );
@lapack2objsz = (@lapack2objsz, @lapackobjs2z = (@lapackobjs2z,
zlarfy, zlarfy,
zsyconvf, zsyconvf,
ztrevc3, ztrevc3,
@ -786,31 +786,31 @@ zpotri,
zlamswlq, zlamswlq,
zgemlq, zgemlq,
); );
@lapack2objs = (@lapack2objs, @lapackobjs2s = (@lapackobjs2s,
sladiv1, sladiv1);
dladiv1, @lapackobjs2d = (@lapackobjs2d,
dladiv1);
@lapackobjs = (@lapackobjs,
iparam2stage, iparam2stage,
# functions added for lapack-3.8.0 # functions added for lapack-3.8.0
ilaenv2stage, ilaenv2stage,
); );
# functions added for lapack-3.9.0 # functions added for lapack-3.9.0
@lapack2objsc = (@lapack2objsc, @lapackobjs2c = (@lapackobjs2c,
cgesvdq, cgesvdq,
cungtsqr, cungtsqr
dcombssq,
); );
@lapack2objsd = (@lapack2objsd, @lapackobjs2d = (@lapackobjs2d,
dcombssq,
dgesvdq, dgesvdq,
dorgtsqr, dorgtsqr,
); );
@lapack2objss = (@lapack2objss, @lapackobjs2s = (@lapackobjs2s,
scombssq, scombssq,
sgesvdq, sgesvdq,
sorgtsqr, sorgtsqr,
); );
@lapack2objsz = (@lapack2objsz, @lapackobjs2z = (@lapackobjs2z,
zgesvdq, zgesvdq,
zungtsqr zungtsqr
); );
@ -835,10 +835,29 @@ zpotri,
dlatzm, dtzrqf); dlatzm, dtzrqf);
@lapack_deprecated_objss = ( @lapack_deprecated_objss = (
sgelsx,
sgegs, sgegs,
sgegv, sgegv,
sgeqpf,
sggsvd,
sggsvp,
slahrd,
slatzm,
stzrqf
); );
@lapack_deprecated_objsz = (
zgegs,
zgegv,
zgelsx,
zgeqpf,
zggsvd,
zggsvp,
zlahrd,
zlatzm,
ztzrqf
);
@lapacke_deprecated_objsc = ( @lapacke_deprecated_objsc = (
LAPACKE_cggsvp, LAPACKE_cggsvp,
LAPACKE_cggsvp_work, LAPACKE_cggsvp_work,
@ -3590,14 +3609,18 @@ use File::Basename;
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib"); my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib");
if ($ARGV[12] == 1) { if ($ARGV[12] == 1) {
@blasobjs = (@blasobjs, @halfblasobjs); @blasobjs = (@blasobjs, @bfblasobjs);
@cblasobjs = (@cblasobjs, @halfcblasobjs); @cblasobjs = (@cblasobjs, @bfcblasobjs);
} }
if ($ARGV[13] == 1) { if ($ARGV[13] == 1) {
@blasobjs = (@blasobjs, @blasobjss); @blasobjs = (@blasobjs, @blasobjss);
@cblasobjs = (@cblasobjs, @cblasobjss); @cblasobjs = (@cblasobjs, @cblasobjss);
@lapackobjs = (@lapackobjs, @lapackobjss); @lapackobjs = (@lapackobjs, @lapackobjss);
@lapack2objs = (@lapack2objs, @lapack2objss); @lapackobjs2 = (@lapackobjs2, @lapackobjs2s);
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc);
@lapackobjs2 = (@lapackobjs2, @lapackobjs2ds);
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objss);
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objss);
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s); @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
@lapackeobjs = (@lapackeobjs, @lapackeobjss); @lapackeobjs = (@lapackeobjs, @lapackeobjss);
} }
@ -3605,7 +3628,12 @@ if ($ARGV[14] == 1) {
@blasobjs = (@blasobjs, @blasobjsd); @blasobjs = (@blasobjs, @blasobjsd);
@cblasobjs = (@cblasobjs, @cblasobjsd); @cblasobjs = (@cblasobjs, @cblasobjsd);
@lapackobjs = (@lapackobjs, @lapackobjsd); @lapackobjs = (@lapackobjs, @lapackobjsd);
@lapack2objs = (@lapack2objs, @lapack2objsd); if ($ARGV[13] == 0) {
@lapackobjs2 = (@lapackobjs2, @lapackobjs2ds);
}
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d, @lapackobjs2dz);
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsd);
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsd);
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d); @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
@lapackeobjs = (@lapackeobjs, @lapackeobjsd); @lapackeobjs = (@lapackeobjs, @lapackeobjsd);
} }
@ -3613,9 +3641,14 @@ if ($ARGV[15] == 1) {
@blasobjs = (@blasobjs, @blasobjsc); @blasobjs = (@blasobjs, @blasobjsc);
@cblasobjs = (@cblasobjs, @cblasobjsc); @cblasobjs = (@cblasobjs, @cblasobjsc);
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsc); @gemm3mobjs = (@gemm3mobjs, @gemm3mobjsc);
@cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsc); @cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsc);
@lapackobjs = (@lapackobjs, @lapackobjsc); @lapackobjs = (@lapackobjs, @lapackobjsc);
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc); @lapackobjs2 = (@lapackobjs2, @lapackobjs2c, @lapackobjs2zc);
if ($ARGV[13] == 0) {
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc);
}
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsc);
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsc);
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c); @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
@lapackeobjs = (@lapackeobjs, @lapackeobjsc); @lapackeobjs = (@lapackeobjs, @lapackeobjsc);
} }
@ -3623,9 +3656,17 @@ if ($ARGV[16] == 1) {
@blasobjs = (@blasobjs, @blasobjsz); @blasobjs = (@blasobjs, @blasobjsz);
@cblasobjs = (@cblasobjs, @cblasobjsz); @cblasobjs = (@cblasobjs, @cblasobjsz);
@gemm3mobjs = (@gemm3mobjs, @gemm3mobjsz); @gemm3mobjs = (@gemm3mobjs, @gemm3mobjsz);
@cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsz); @cblasgemm3mobjs = (@cblasgemm3mobjs, @cblasgemm3mobjsz);
@lapackobjs = (@lapackobjs, @lapackobjsz); @lapackobjs = (@lapackobjs, @lapackobjsz);
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc); @lapackobjs2 = (@lapackobjs2, @lapackobjs2z);
if ($ARGV[15] == 0) {
@lapackobjs2 = (@lapackobjs2, @lapackobjs2zc);
}
if ($ARGV[14] == 0) {
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz);
}
@lapack_deprecated_objs = (@lapack_deprecated_objs, @lapack_deprecated_objsz);
@lapacke_deprecated_objs = (@lapacke_deprecated_objs, @lapacke_deprecated_objsz);
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z); @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
@lapackeobjs = (@lapackeobjs, @lapackeobjsz); @lapackeobjs = (@lapackeobjs, @lapackeobjsz);
} }

View File

@ -33,7 +33,7 @@ if ($compiler eq "") {
"ppuf77", "ppuf95", "ppuf90", "ppuxlf", "ppuf77", "ppuf95", "ppuf90", "ppuxlf",
"pathf90", "pathf95", "pathf90", "pathf95",
"pgf95", "pgf90", "pgf77", "pgf95", "pgf90", "pgf77",
"flang", "flang", "egfortran",
"ifort"); "ifort");
OUTER: OUTER:

View File

@ -330,7 +330,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3" "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
#define LIBNAME "haswell" #define LIBNAME "haswell"
#define CORENAME "HASWELL" #define CORENAME "HASWELL"
#endif #endif
@ -346,7 +346,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3" "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
#define LIBNAME "haswell" #define LIBNAME "haswell"
#define CORENAME "HASWELL" #define CORENAME "HASWELL"
#else #else
@ -359,7 +359,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3 -DHAVE_AVX512VL -march=skylake-avx512" "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -march=skylake-avx512"
#define LIBNAME "skylakex" #define LIBNAME "skylakex"
#define CORENAME "SKYLAKEX" #define CORENAME "SKYLAKEX"
#endif #endif
@ -376,7 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3" "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
#define LIBNAME "haswell" #define LIBNAME "haswell"
#define CORENAME "HASWELL" #define CORENAME "HASWELL"
#else #else
@ -389,7 +389,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=cooperlake" "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=cooperlake"
#define LIBNAME "cooperlake" #define LIBNAME "cooperlake"
#define CORENAME "COOPERLAKE" #define CORENAME "COOPERLAKE"
#endif #endif
@ -559,7 +559,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \ "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \ "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
"-DHAVE_AVX -DHAVE_FMA3 -DFMA3" "-DHAVE_AVX -DHAVE_AVX2 -DHAVE_FMA3 -DFMA3"
#define LIBNAME "zen" #define LIBNAME "zen"
#define CORENAME "ZEN" #define CORENAME "ZEN"
#endif #endif
@ -1236,6 +1236,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else #else
#endif #endif
#ifdef FORCE_VORTEX
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "VORTEX"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DVORTEX " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "vortex"
#define CORENAME "VORTEX"
#endif
#ifdef FORCE_ZARCH_GENERIC #ifdef FORCE_ZARCH_GENERIC
#define FORCE #define FORCE
#define ARCHITECTURE "ZARCH" #define ARCHITECTURE "ZARCH"
@ -1409,8 +1423,41 @@ int main(int argc, char *argv[]){
printf("NUM_CORES=%d\n", get_num_cores()); printf("NUM_CORES=%d\n", get_num_cores());
#if defined(__arm__) && !defined(FORCE) #if defined(__arm__)
#if !defined(FORCE)
fprintf(stderr,"get features!\n");
get_features(); get_features();
#else
fprintf(stderr,"split archconfig!\n");
sprintf(buffer, "%s", ARCHCONFIG);
p = &buffer[0];
while (*p) {
if ((*p == '-') && (*(p + 1) == 'D')) {
p += 2;
if (*p != 'H') {
while( (*p != ' ') && (*p != '-') && (*p != '\0') && (*p != '\n')) {p++; }
if (*p == '-') continue;
}
while ((*p != ' ') && (*p != '\0')) {
if (*p == '=') {
printf("=");
p ++;
while ((*p != ' ') && (*p != '\0')) {
printf("%c", *p);
p ++;
}
} else {
printf("%c", *p);
p ++;
if ((*p == ' ') || (*p =='\0')) printf("=1\n");
}
}
} else p ++;
}
#endif
#endif #endif

View File

@ -48,6 +48,7 @@ SBLAS3OBJS = \
ifeq ($(BUILD_BFLOAT16),1) ifeq ($(BUILD_BFLOAT16),1)
SBBLAS1OBJS = sbdot.$(SUFFIX) SBBLAS1OBJS = sbdot.$(SUFFIX)
SBBLAS2OBJS = sbgemv.$(SUFFIX)
SBBLAS3OBJS = sbgemm.$(SUFFIX) SBBLAS3OBJS = sbgemm.$(SUFFIX)
SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX) SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX)
endif endif
@ -284,6 +285,7 @@ CSBLAS3OBJS = \
ifeq ($(BUILD_BFLOAT16),1) ifeq ($(BUILD_BFLOAT16),1)
CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX) CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX)
CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX)
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX)
CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX) CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX)
endif endif
@ -382,6 +384,7 @@ SBLAS1OBJS += $(CSBLAS1OBJS)
SBLAS2OBJS += $(CSBLAS2OBJS) SBLAS2OBJS += $(CSBLAS2OBJS)
SBLAS3OBJS += $(CSBLAS3OBJS) SBLAS3OBJS += $(CSBLAS3OBJS)
SBBLAS1OBJS += $(CSBBLAS1OBJS) SBBLAS1OBJS += $(CSBBLAS1OBJS)
SBBLAS2OBJS += $(CSBBLAS2OBJS)
SBBLAS3OBJS += $(CSBBLAS3OBJS) SBBLAS3OBJS += $(CSBBLAS3OBJS)
DBLAS1OBJS += $(CDBLAS1OBJS) DBLAS1OBJS += $(CDBLAS1OBJS)
DBLAS2OBJS += $(CDBLAS2OBJS) DBLAS2OBJS += $(CDBLAS2OBJS)
@ -399,7 +402,7 @@ CBAUXOBJS += $(CXERBLAOBJ)
endif endif
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS3OBJS) SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS)
DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS) DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS)
QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS)
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
@ -507,7 +510,7 @@ ifneq ($(BUILD_COMPLEX16),1)
endif endif
FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) FUNCOBJS = $(SBEXTOBJS) $(CXERBLAOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
$(info FUNCOBJS = {[$(FUNCOBJS)]} )
ifdef EXPRECISION ifdef EXPRECISION
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
endif endif
@ -538,7 +541,7 @@ clean ::
level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
@ -929,6 +932,11 @@ xgeru.$(SUFFIX) xgeru.$(PSUFFIX) : zger.c
xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c
$(CC) -c $(CFLAGS) -DCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -DCONJ $< -o $(@F)
ifeq ($(BUILD_BFLOAT16),1)
sbgemv.$(SUFFIX) sbgemv.$(PSUFFIX) : sbgemv.c
$(CC) $(CFLAGS) -c $< -o $(@F)
endif
ifndef USE_NETLIB_GEMV ifndef USE_NETLIB_GEMV
sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c
$(CC) -c $(CFLAGS) -o $(@F) $< $(CC) -c $(CFLAGS) -o $(@F) $<
@ -1656,6 +1664,11 @@ cblas_csscal.$(SUFFIX) cblas_csscal.$(PSUFFIX) : zscal.c
cblas_zdscal.$(SUFFIX) cblas_zdscal.$(PSUFFIX) : zscal.c cblas_zdscal.$(SUFFIX) cblas_zdscal.$(PSUFFIX) : zscal.c
$(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F) $(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F)
ifeq ($(BUILD_BFLOAT16),1)
cblas_sbgemv.$(SUFFIX) cblas_sbgemv.$(PSUFFIX) : sbgemv.c
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
endif
cblas_sgemv.$(SUFFIX) cblas_sgemv.$(PSUFFIX): gemv.c cblas_sgemv.$(SUFFIX) cblas_sgemv.$(PSUFFIX): gemv.c
$(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $< $(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $<

View File

@ -191,7 +191,6 @@ void CNAME(enum CBLAS_ORDER order,
} }
#endif #endif
//printf("m=%d, n=%d, trans=%d, incx=%d, incy=%d, alpha=%f, beta=%f\n", m, n, trans, incx, incy, alpha, beta);
if ((m==0) || (n==0)) return; if ((m==0) || (n==0)) return;
lenx = n; lenx = n;

210
interface/sbgemv.c Normal file
View File

@ -0,0 +1,210 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#include <stdio.h>
#include "common.h"
#include "l1param.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif
#define ERROR_NAME "SBGEMV "
#ifdef SMP
static int (*sbgemv_thread[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG, int) = {
sbgemv_thread_n, sbgemv_thread_t,
};
#endif
#ifndef CBLAS
void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasint *LDA, bfloat16 *x, blasint *INCX, float *BETA, float *y, blasint *INCY)
{
char trans = *TRANS;
blasint m = *M;
blasint n = *N;
blasint lda = *LDA;
blasint incx = *INCX;
blasint incy = *INCY;
float alpha = *ALPHA;
float beta = *BETA;
#ifdef SMP
int nthreads;
#endif
int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = {
SBGEMV_N, SBGEMV_T,
};
blasint info;
blasint lenx, leny;
blasint i;
PRINT_DEBUG_NAME;
TOUPPER(trans);
info = 0;
i = -1;
if (trans == 'N') {i = 0;}
if (trans == 'T') {i = 1;}
if (trans == 'R') {i = 0;}
if (trans == 'C') {i = 1;}
if (incy == 0) {info = 11;}
if (incx == 0) {info = 8;}
if (lda < MAX(1, m)) {info = 6;}
if (n < 0) {info = 3;}
if (m < 0) {info = 2;}
if (i < 0) {info = 1;}
trans = i;
if (info != 0) {
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
return;
}
#else
void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, float alpha, bfloat16 *a, blasint lda, bfloat16 *x, blasint incx, float beta, float *y, blasint incy)
{
blasint lenx, leny;
int trans;
blasint info, t;
#ifdef SMP
int nthreads;
#endif
int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = {
SBGEMV_N, SBGEMV_T,
};
PRINT_DEBUG_CNAME;
trans = -1;
info = 0;
if (order == CblasColMajor) { // Column Major
if (TransA == CblasNoTrans || TransA == CblasConjNoTrans) {
trans = 0;
} else if (TransA == CblasTrans || TransA == CblasConjTrans) {
trans = 1;
}
} else { // Row Major
if (TransA == CblasNoTrans || TransA == CblasConjNoTrans) {
trans = 1;
} else if (TransA == CblasTrans || TransA == CblasConjTrans) {
trans = 0;
}
t = n;
n = m;
m = t;
}
info = -1;
if (incy == 0) {info = 11;}
if (incx == 0) {info = 8;}
if (lda < MAX(1, m)) {info = 6;}
if (n < 0) {info = 3;}
if (m < 0) {info = 2;}
if (trans < 0) {info = 1;}
if (info >= 0) {
BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
return;
}
#endif
if ((m==0) || (n==0)) return;
if (trans) {
lenx = m;
leny = n;
} else {
lenx = n;
leny = m;
}
if (alpha == ZERO) {
if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0);
return;
}
IDEBUG_START;
FUNCTION_PROFILE_START();
if (incx < 0) {x -= (lenx - 1) * incx;}
if (incy < 0) {y -= (leny - 1) * incy;}
#ifdef SMP
int thread_thres_row = 20480;
if (trans) {
if (n <= thread_thres_row) {
nthreads = 1;
} else {
nthreads = num_cpu_avail(1);
}
} else {
if (m <= thread_thres_row) {
nthreads = 1;
} else {
nthreads = num_cpu_avail(1);
}
}
if (nthreads == 1) {
#endif
(sbgemv[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy);
#ifdef SMP
} else {
(sbgemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy, nthreads);
}
#endif
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
IDEBUG_END;
return;
}

View File

@ -5,37 +5,24 @@ endif
TOPDIR = .. TOPDIR = ..
include $(TOPDIR)/Makefile.system include $(TOPDIR)/Makefile.system
ifdef HAVE_SSE3
CFLAGS += -msse3
endif
ifdef HAVE_SSSE3
CFLAGS += -mssse3
endif
ifeq ($(C_COMPILER), GCC)
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
endif
ifeq ($(ARCH), power) ifeq ($(ARCH), power)
ifeq ($(C_COMPILER), CLANG) ifeq ($(C_COMPILER), CLANG)
override CFLAGS += -fno-integrated-as override CFLAGS += -fno-integrated-as
endif endif
endif endif
AVX2OPT = AVX2OPT =
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0 # AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
AVX2OPT = -mavx2 AVX2OPT = -mavx2
endif endif
endif endif
ifeq ($(C_COMPILER), CLANG) ifeq ($(C_COMPILER), CLANG)
# Any clang posing as gcc 4.2 should be new enough (3.4 or later) # Any clang posing as gcc 4.2 should be new enough (3.4 or later)
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2)
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2) ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2), 11)
AVX2OPT = -mavx2 AVX2OPT = -mavx2
endif endif
endif endif
@ -44,12 +31,6 @@ ifdef NO_AVX2
endif endif
ifdef TARGET_CORE ifdef TARGET_CORE
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
override CFLAGS += -msse -msse2 -msse3 -mssse3 -msse4.1
endif
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),KATMAI COPPERMINE BANIAS NORTHWOOD ATHLON OPTERON))
override CFLAGS += -msse -msse2
endif
ifeq ($(TARGET_CORE), COOPERLAKE) ifeq ($(TARGET_CORE), COOPERLAKE)
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
ifeq ($(GCCVERSIONGTEQ10), 1) ifeq ($(GCCVERSIONGTEQ10), 1)

View File

@ -48,6 +48,16 @@ ifndef XGEMVTKERNEL
XGEMVTKERNEL = zgemv_t.S XGEMVTKERNEL = zgemv_t.S
endif endif
ifeq ($(BUILD_BFLOAT16),1)
ifndef SBGEMVNKERNEL
SBGEMVNKERNEL = ../x86_64/sbgemv_n.c
endif
ifndef SBGEMVTKERNEL
SBGEMVTKERNEL = ../x86_64/sbgemv_t.c
endif
endif
### GER ### ### GER ###
ifndef SGERKERNEL ifndef SGERKERNEL
@ -234,6 +244,12 @@ XBLASOBJS += \
xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \ xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \
xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX) xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX)
ifeq ($(BUILD_BFLOAT16),1)
SBBLASOBJS += \
sbgemv_n$(TSUFFIX).$(SUFFIX) \
sbgemv_t$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" "" ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
$(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@ $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@
@ -483,4 +499,10 @@ $(KDIR)xhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h $(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
ifeq ($(BUILD_BFLOAT16),1)
$(KDIR)sbgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_n$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMVNKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
$(KDIR)sbgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_t$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMVTKERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
endif

View File

@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
i++ ; i++ ;
} }
#if !defined(__POWER__) #if !defined(__PPC__)
CREAL(result) = dot[0]; CREAL(result) = dot[0];
CIMAG(result) = dot[1]; CIMAG(result) = dot[1];
#else #else

View File

@ -34,12 +34,12 @@ SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_power10.c DGEMMKERNEL = dgemm_kernel_power10.c
DGEMMINCOPY = ../generic/gemm_ncopy_16.c DGEMMINCOPY =
DGEMMITCOPY = dgemm_tcopy_16_power8.S DGEMMITCOPY =
DGEMMONCOPY = dgemm_ncopy_4_power8.S DGEMMONCOPY = dgemm_ncopy_8_power10.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
@ -69,7 +69,7 @@ STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
@ -141,23 +141,19 @@ DASUMKERNEL = dasum.c
CASUMKERNEL = casum.c CASUMKERNEL = casum.c
ZASUMKERNEL = zasum.c ZASUMKERNEL = zasum.c
# #
SAXPYKERNEL = saxpy.c SAXPYKERNEL = saxpy_power10.c
DAXPYKERNEL = daxpy_power10.c DAXPYKERNEL = daxpy_power10.c
ifneq ($(GCCVERSIONGTEQ9),1) CAXPYKERNEL = caxpy_power10.c
CAXPYKERNEL = caxpy_power9.S
else
CAXPYKERNEL = caxpy.c
endif
ZAXPYKERNEL = zaxpy_power10.c ZAXPYKERNEL = zaxpy_power10.c
# #
SCOPYKERNEL = scopy.c SCOPYKERNEL = scopy_power10.c
DCOPYKERNEL = dcopy_power10.c DCOPYKERNEL = dcopy_power10.c
CCOPYKERNEL = ccopy.c CCOPYKERNEL = ccopy_power10.c
ZCOPYKERNEL = zcopy_power10.c ZCOPYKERNEL = zcopy_power10.c
# #
SDOTKERNEL = sdot.c SDOTKERNEL = sdot_power10.c
DDOTKERNEL = ddot.c DDOTKERNEL = ddot_power10.c
DSDOTKERNEL = sdot.c DSDOTKERNEL = sdot_power10.c
ifneq ($(GCCVERSIONGTEQ9),1) ifneq ($(GCCVERSIONGTEQ9),1)
CDOTKERNEL = cdot_power9.S CDOTKERNEL = cdot_power9.S
else else

View File

@ -0,0 +1,188 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#define HAVE_KERNEL_8 1
static void caxpy_kernel_8 (long n, float *x, float *y,
float alpha_r, float alpha_i)
{
#if !defined(CONJ)
static const float mvec[4] = { -1.0, 1.0, -1.0, 1.0 };
#else
static const float mvec[4] = { 1.0, -1.0, 1.0, -1.0 };
#endif
const float *mvecp = mvec;
/* We have to load reverse mask for big endian. */
/* __vector unsigned char mask={ 4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}; */
__vector unsigned char mask = { 11,10,9,8,15,14,13,12,3,2,1,0,7,6,5,4};
long ytmp;
__asm__
(
"xscvdpspn 32, %7 \n\t"
"xscvdpspn 33, %8 \n\t"
"xxspltw 32, 32, 0 \n\t"
"xxspltw 33, 33, 0 \n\t"
"lxvd2x 36, 0, %9 \n\t" // mvec
#if !defined(CONJ)
"xvmulsp 33, 33, 36 \n\t" // alpha_i * mvec
#else
"xvmulsp 32, 32, 36 \n\t" // alpha_r * mvec
#endif
"mr %4, %3 \n\t"
"dcbt 0, %2 \n\t"
"dcbt 0, %3 \n\t"
"lxvp 40, 0(%2) \n\t" // x0
"lxvp 42, 32(%2) \n\t" // x2
"lxvp 48, 0(%3) \n\t" // y0
"lxvp 50, 32(%3) \n\t" // y2
"xxperm 52, 40, %x10 \n\t" // exchange real and imag part
"xxperm 53, 41, %x10 \n\t" // exchange real and imag part
"xxperm 54, 42, %x10 \n\t" // exchange real and imag part
"xxperm 55, 43, %x10 \n\t" // exchange real and imag part
"lxvp 44, 64(%2) \n\t" // x4
"lxvp 46, 96(%2) \n\t" // x6
"lxvp 34, 64(%3) \n\t" // y4
"lxvp 38, 96(%3) \n\t" // y6
"xxperm 56, 44, %x10 \n\t" // exchange real and imag part
"xxperm 57, 45, %x10 \n\t" // exchange real and imag part
"xxperm 58, 46, %x10 \n\t" // exchange real and imag part
"xxperm 59, 47, %x10 \n\t" // exchange real and imag part
"addi %2, %2, 128 \n\t"
"addi %3, %3, 128 \n\t"
"addic. %1, %1, -16 \n\t"
"ble two%= \n\t"
".align 5 \n"
"one%=: \n\t"
"xvmaddasp 48, 40, 32 \n\t" // alpha_r * x0_r , alpha_r * x0_i
"xvmaddasp 49, 41, 32 \n\t"
"lxvp 40, 0(%2) \n\t" // x0
"xvmaddasp 50, 42, 32 \n\t"
"xvmaddasp 51, 43, 32 \n\t"
"lxvp 42, 32(%2) \n\t" // x2
"xvmaddasp 34, 44, 32 \n\t"
"xvmaddasp 35, 45, 32 \n\t"
"lxvp 44, 64(%2) \n\t" // x4
"xvmaddasp 38, 46, 32 \n\t"
"xvmaddasp 39, 47, 32 \n\t"
"lxvp 46, 96(%2) \n\t" // x6
"xvmaddasp 48, 52, 33 \n\t" // alpha_i * x0_i , alpha_i * x0_r
"addi %2, %2, 128 \n\t"
"xvmaddasp 49, 53, 33 \n\t"
"xvmaddasp 50, 54, 33 \n\t"
"xvmaddasp 51, 55, 33 \n\t"
"xvmaddasp 34, 56, 33 \n\t"
"xvmaddasp 35, 57, 33 \n\t"
"xvmaddasp 38, 58, 33 \n\t"
"xvmaddasp 39, 59, 33 \n\t"
"stxvp 48, 0(%4) \n\t"
"stxvp 50, 32(%4) \n\t"
"stxvp 34, 64(%4) \n\t"
"stxvp 38, 96(%4) \n\t"
"addi %4, %4, 128 \n\t"
"xxperm 52, 40, %x10 \n\t" // exchange real and imag part
"xxperm 53, 41, %x10 \n\t" // exchange real and imag part
"lxvp 48, 0(%3) \n\t" // y0
"xxperm 54, 42, %x10 \n\t" // exchange real and imag part
"xxperm 55, 43, %x10 \n\t" // exchange real and imag part
"lxvp 50, 32(%3) \n\t" // y2
"xxperm 56, 44, %x10 \n\t" // exchange real and imag part
"xxperm 57, 45, %x10 \n\t" // exchange real and imag part
"lxvp 34, 64(%3) \n\t" // y4
"xxperm 58, 46, %x10 \n\t" // exchange real and imag part
"xxperm 59, 47, %x10 \n\t" // exchange real and imag part
"lxvp 38, 96(%3) \n\t" // y6
"addi %3, %3, 128 \n\t"
"addic. %1, %1, -16 \n\t"
"bgt one%= \n"
"two%=: \n\t"
"xvmaddasp 48, 40, 32 \n\t" // alpha_r * x0_r , alpha_r * x0_i
"xvmaddasp 49, 41, 32 \n\t"
"xvmaddasp 50, 42, 32 \n\t"
"xvmaddasp 51, 43, 32 \n\t"
"xvmaddasp 34, 44, 32 \n\t"
"xvmaddasp 35, 45, 32 \n\t"
"xvmaddasp 38, 46, 32 \n\t"
"xvmaddasp 39, 47, 32 \n\t"
"xvmaddasp 48, 52, 33 \n\t" // alpha_i * x0_i , alpha_i * x0_r
"xvmaddasp 49, 53, 33 \n\t"
"xvmaddasp 50, 54, 33 \n\t"
"xvmaddasp 51, 55, 33 \n\t"
"xvmaddasp 34, 56, 33 \n\t"
"xvmaddasp 35, 57, 33 \n\t"
"xvmaddasp 38, 58, 33 \n\t"
"xvmaddasp 39, 59, 33 \n\t"
"stxvp 48, 0(%4) \n\t"
"stxvp 50, 32(%4) \n\t"
"stxvp 34, 64(%4) \n\t"
"stxvp 38, 96(%4) \n\t"
"#n=%1 x=%5=%2 y=%0=%3 alpha=(%7,%8) mvecp=%6=%9 ytmp=%4\n"
:
"+m" (*y),
"+r" (n), // 1
"+b" (x), // 2
"+b" (y), // 3
"=b" (ytmp) // 4
:
"m" (*x),
"m" (*mvecp),
"d" (alpha_r), // 7
"d" (alpha_i), // 8
"4" (mvecp), // 9
"wa" (mask)
:
"cr0",
"vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
"vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47",
"vs48","vs49","vs50","vs51","vs52","vs53","vs54","vs55",
"vs56","vs57","vs58","vs59"
);
}

View File

@ -0,0 +1,126 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
#if defined(__VEC__) || defined(__ALTIVEC__)
#include "caxpy_microk_power10.c"
#endif
#ifndef HAVE_KERNEL_8
static void caxpy_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT da_r,FLOAT da_i)
{
BLASLONG register i = 0;
BLASLONG register ix = 0;
while(i < n)
{
#if !defined(CONJ)
y[ix] += ( da_r * x[ix] - da_i * x[ix+1] ) ;
y[ix+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ;
y[ix+2] += ( da_r * x[ix+2] - da_i * x[ix+3] ) ;
y[ix+3] += ( da_r * x[ix+3] + da_i * x[ix+2] ) ;
#else
y[ix] += ( da_r * x[ix] + da_i * x[ix+1] ) ;
y[ix+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ;
y[ix+2] += ( da_r * x[ix+2] + da_i * x[ix+3] ) ;
y[ix+3] -= ( da_r * x[ix+3] - da_i * x[ix+2] ) ;
#endif
ix+=4 ;
i+=2 ;
}
}
#endif
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
if ( n <= 0 ) return(0);
if ( (inc_x == 1) && (inc_y == 1) )
{
BLASLONG n1 = n & -16;
if ( n1 )
{
caxpy_kernel_8 (n1, x, y, da_r, da_i);
ix = 2 * n1;
}
i = n1;
while(i < n)
{
#if !defined(CONJ)
y[ix] += ( da_r * x[ix] - da_i * x[ix+1] ) ;
y[ix+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ;
#else
y[ix] += ( da_r * x[ix] + da_i * x[ix+1] ) ;
y[ix+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ;
#endif
i++ ;
ix += 2;
}
return(0);
}
inc_x *=2;
inc_y *=2;
while(i < n)
{
#if !defined(CONJ)
y[iy] += ( da_r * x[ix] - da_i * x[ix+1] ) ;
y[iy+1] += ( da_r * x[ix+1] + da_i * x[ix] ) ;
#else
y[iy] += ( da_r * x[ix] + da_i * x[ix+1] ) ;
y[iy+1] -= ( da_r * x[ix+1] - da_i * x[ix] ) ;
#endif
ix += inc_x ;
iy += inc_y ;
i++ ;
}
return(0);
}

View File

@ -0,0 +1,132 @@
/***************************************************************************
Copyright (c) 2013-2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
#if defined(__VEC__) || defined(__ALTIVEC__)
#include "copy_microk_power10.c"
#endif
#ifndef HAVE_KERNEL
static void copy_kernel(BLASLONG n, FLOAT *x, FLOAT *y)
{
BLASLONG i=0;
FLOAT f0, f1, f2, f3, f4, f5, f6, f7;
FLOAT *x1=x;
FLOAT *y1=y;
while ( i<n )
{
f0 = x1[0];
f1 = x1[1];
f2 = x1[2];
f3 = x1[3];
f4 = x1[4];
f5 = x1[5];
f6 = x1[6];
f7 = x1[7];
y1[0] = f0;
y1[1] = f1;
y1[2] = f2;
y1[3] = f3;
y1[4] = f4;
y1[5] = f5;
y1[6] = f6;
y1[7] = f7;
x1 += 8;
y1 += 8;
i+=4;
}
return;
}
#endif
int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
if ( n <= 0 ) return(0);
if ( (inc_x == 1) && (inc_y == 1 ))
{
BLASLONG n1 = n & -64;
if ( n1 > 0 )
{
copy_kernel(n1, x, y);
i=n1;
ix=n1*2;
iy=n1*2;
}
while(i < n)
{
y[iy] = x[iy] ;
y[iy+1] = x[ix+1] ;
ix+=2;
iy+=2;
i++ ;
}
}
else
{
BLASLONG inc_x2 = 2 * inc_x;
BLASLONG inc_y2 = 2 * inc_y;
while(i < n)
{
y[iy] = x[ix] ;
y[iy+1] = x[ix+1] ;
ix += inc_x2 ;
iy += inc_y2 ;
i++ ;
}
}
return(0);
}

Some files were not shown because too many files have changed in this diff Show More