Compare commits
10 Commits
integer_da
...
v0.2.13
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
51ce5ef447 | ||
|
|
37aee1f9b1 | ||
|
|
7e4e195e82 | ||
|
|
a7126c2ce4 | ||
|
|
f20c0f9819 | ||
|
|
21b5347fbe | ||
|
|
f9991fd5f6 | ||
|
|
da3d70420a | ||
|
|
f773f492f3 | ||
|
|
3e068e78e2 |
@@ -121,11 +121,5 @@ In chronological order:
|
||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||
ARMv8 support.
|
||||
|
||||
* Dan Kortschak
|
||||
* [2015-01-07] Added test for drotmg bug #484.
|
||||
|
||||
* Ton van den Heuvel <https://github.com/ton>
|
||||
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
|
||||
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
|
||||
@@ -1,24 +1,4 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.14
|
||||
24-Mar-2015
|
||||
common:
|
||||
* Improve OpenBLASConfig.cmake. (#474, #475. Thanks, xantares.)
|
||||
* Improve ger and gemv for small matrices by stack allocation.
|
||||
e.g. make -DMAX_STACK_ALLOC=2048 (#482. Thanks, Jerome Robert.)
|
||||
* Introduce openblas_get_num_threads and openblas_get_num_procs.
|
||||
(#497. Thanks, Erik Schnetter.)
|
||||
* Add ATLAS-style ?geadd function. (#509. Thanks, Martin Köhler.)
|
||||
* Fix c/zsyr bug with negative incx. (#492.)
|
||||
* Fix race condition during shutdown causing a crash in
|
||||
gotoblas_set_affinity(). (#508. Thanks, Ton van den Heuvel.)
|
||||
|
||||
x86/x86-64:
|
||||
* Support AMD Streamroller.
|
||||
|
||||
ARM:
|
||||
* Add Cortex-A9 and Cortex-A15 targets.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.13
|
||||
3-Dec-2014
|
||||
|
||||
@@ -9,10 +9,10 @@
|
||||
|
||||
If you want to allocate 64 large pages,
|
||||
|
||||
$shell> echo 0 > /proc/sys/vm/nr_hugepages # need to be reset
|
||||
$shell> echo 65 > /proc/sys/vm/nr_hugepages # add 1 extra page
|
||||
$shell> echo 3355443200 > /proc/sys/kernel/shmmax # just large number
|
||||
$shell> echo 3355443200 > /proc/sys/kernel/shmall
|
||||
$shell> echo 0 > /pros/sys/vm/nr_hugepages # need to be reset
|
||||
$shell> echo 65 > /pros/sys/vm/nr_hugepages # add 1 extra page
|
||||
$shell> echo 3355443200 > /pros/sys/kernel/shmmax # just large number
|
||||
$shell> echo 3355443200 > /pros/sys/kernel/shmall
|
||||
|
||||
Also may add a few lines into /etc/security/limits.conf file.
|
||||
|
||||
|
||||
6
Makefile
6
Makefile
@@ -20,8 +20,6 @@ ifneq ($(NO_LAPACK), 1)
|
||||
SUBDIRS += lapack
|
||||
endif
|
||||
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
@@ -133,7 +131,7 @@ ifeq ($(CORE), UNKOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
|
||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
||||
endif
|
||||
ifeq ($(NO_STATIC), 1)
|
||||
ifeq ($(NO_SHARED), 1)
|
||||
@@ -233,7 +231,7 @@ ifndef NOFORTRAN
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
|
||||
15
Makefile.arm
15
Makefile.arm
@@ -1,23 +1,8 @@
|
||||
# ifeq logical or
|
||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
|
||||
@@ -9,7 +9,7 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_DIR := $(PREFIX)/cmake
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
|
||||
.PHONY : install
|
||||
@@ -46,11 +46,11 @@ ifndef NO_CBLAS
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
@@ -86,8 +86,8 @@ ifeq ($(OSNAME), Darwin)
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
@@ -95,8 +95,7 @@ endif
|
||||
endif
|
||||
#Generating OpenBLASConfig.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
ifndef NO_SHARED
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.14
|
||||
VERSION = 0.2.13
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
@@ -114,9 +114,6 @@ NO_AFFINITY = 1
|
||||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
|
||||
# QUAD_PRECISION = 1
|
||||
|
||||
# Support for integer matrix and vector (e.g. iaxpy)
|
||||
# INTEGER_PRECISION = 1
|
||||
|
||||
# Theads are still working for a while after finishing BLAS operation
|
||||
# to reduce thread activate/deactivate overhead. You can determine
|
||||
# time out to improve performance. This number should be from 4 to 30
|
||||
@@ -162,19 +159,6 @@ COMMON_PROF = -pg
|
||||
# Build Debug version
|
||||
# DEBUG = 1
|
||||
|
||||
# Improve GEMV and GER for small matrices by stack allocation.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
#
|
||||
MAX_STACK_ALLOC=2048
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoid conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459
|
||||
#
|
||||
# SYMBOLPREFIX=
|
||||
# SYMBOLSUFFIX=
|
||||
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
||||
@@ -23,7 +23,6 @@ CC = gcc
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CC = clang
|
||||
# EXTRALIB += -Wl,-no_compact_unwind
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -62,12 +61,6 @@ endif
|
||||
ifeq ($(TARGET), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -92,12 +85,6 @@ endif
|
||||
ifeq ($(TARGET_CORE), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -202,18 +189,12 @@ DLLWRAP = $(CROSS_SUFFIX)dllwrap
|
||||
OBJCOPY = $(CROSS_SUFFIX)objcopy
|
||||
OBJCONV = $(CROSS_SUFFIX)objconv
|
||||
|
||||
|
||||
# For detect fortran failed, only build BLAS.
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
NO_LAPACK = 1
|
||||
endif
|
||||
|
||||
#
|
||||
# OS dependent settings
|
||||
#
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.6
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.2
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
@@ -309,10 +290,6 @@ CCOMMON_OPT += -DQUAD_PRECISION
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifdef INTEGER_PRECISION
|
||||
CCOMMON_OPT += -DINTEGER_PRECISION
|
||||
endif
|
||||
|
||||
ifneq ($(ARCH), x86)
|
||||
ifneq ($(ARCH), x86_64)
|
||||
NO_EXPRECISION = 1
|
||||
@@ -328,10 +305,6 @@ ifdef SANITY_CHECK
|
||||
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
||||
endif
|
||||
|
||||
ifdef MAX_STACK_ALLOC
|
||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
||||
endif
|
||||
|
||||
#
|
||||
# Architecture dependent settings
|
||||
#
|
||||
@@ -381,12 +354,6 @@ endif
|
||||
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
|
||||
#check
|
||||
ifeq ($(USE_THREAD), 0)
|
||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
||||
endif
|
||||
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
CCOMMON_OPT += -fopenmp
|
||||
@@ -425,7 +392,7 @@ endif
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
|
||||
endif
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += HASWELL
|
||||
@@ -595,7 +562,7 @@ else
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
endif
|
||||
@@ -607,14 +574,14 @@ ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), FUJITSU)
|
||||
CCOMMON_OPT += -DF_INTERFACE_FUJITSU
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -632,7 +599,7 @@ endif
|
||||
else
|
||||
FCOMMON_OPT += -q32
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -650,7 +617,7 @@ FCOMMON_OPT += -tp p7-64
|
||||
else
|
||||
FCOMMON_OPT += -tp p7
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
endif
|
||||
@@ -679,7 +646,7 @@ FCOMMON_OPT += -mabi=n32
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
endif
|
||||
@@ -716,7 +683,7 @@ FCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifdef USE_OPENMP
|
||||
FEXTRALIB += -lstdc++
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
@@ -764,14 +731,14 @@ FCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -xopenmp=parallel
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), COMPAQ)
|
||||
CCOMMON_OPT += -DF_INTERFACE_COMPAQ
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifdef USE_OPENMP
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -4,7 +4,6 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
@@ -23,18 +22,12 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
ifdef INTEGER_PRECISION
|
||||
BLASOBJS += $(IBLASOBJS)
|
||||
BLASOBJS_P += $(IBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX
|
||||
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
@@ -42,7 +35,6 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
||||
libs :: $(BLASOBJS) $(COMMONOBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
@@ -60,7 +60,6 @@ Please read GotoBLAS_01Readme.txt
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
|
||||
@@ -32,8 +32,6 @@ ISTANBUL
|
||||
BOBCAT
|
||||
BULLDOZER
|
||||
PILEDRIVER
|
||||
STEAMROLLER
|
||||
EXCAVATOR
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
@@ -64,11 +62,6 @@ SPARC
|
||||
SPARCV7
|
||||
|
||||
6.ARM CPU:
|
||||
CORTEXA15
|
||||
CORTEXA9
|
||||
ARMV7
|
||||
ARMV6
|
||||
ARMV5
|
||||
|
||||
7.ARM 64-bit CPU:
|
||||
ARMV8
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
for f in *.goto *.acml *.mkl *.atlas
|
||||
do
|
||||
if [ -f "$f" ]; then
|
||||
mv $f `echo $f|tr '.' '_'`.exe
|
||||
fi
|
||||
done
|
||||
|
||||
1019
benchmark/Makefile
1019
benchmark/Makefile
File diff suppressed because it is too large
Load Diff
196
benchmark/asum.c
196
benchmark/asum.c
@@ -1,196 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef ASUM
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dzasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(scasum)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(sasum)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = ASUM (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -114,7 +114,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
@@ -198,4 +198,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -71,14 +71,8 @@ double fabs(double);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
@@ -105,7 +99,6 @@ int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static __inline double getmflops(int ratio, int m, double secs){
|
||||
|
||||
double mm = (double)m;
|
||||
@@ -124,7 +117,7 @@ static __inline double getmflops(int ratio, int m, double secs){
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
@@ -280,4 +273,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
201
benchmark/copy.c
201
benchmark/copy.c
@@ -1,201 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef COPY
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define COPY BLASFUNC(zcopy)
|
||||
#else
|
||||
#define COPY BLASFUNC(ccopy)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define COPY BLASFUNC(dcopy)
|
||||
#else
|
||||
#define COPY BLASFUNC(scopy)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
COPY (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT result;
|
||||
@@ -192,4 +192,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -139,12 +139,11 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||
FLOAT wkopt[4];
|
||||
char job='V';
|
||||
char jobr='N';
|
||||
char *p;
|
||||
|
||||
blasint m, i, j, info,lwork;
|
||||
@@ -203,9 +202,9 @@ int main(int argc, char *argv[]){
|
||||
lwork = -1;
|
||||
m=to;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
@@ -227,16 +226,16 @@ int main(int argc, char *argv[]){
|
||||
|
||||
lwork = -1;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
@@ -258,4 +257,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,15 +118,14 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
char trans='N';
|
||||
blasint m, n, i, j;
|
||||
blasint m, i, j;
|
||||
int loops = 1;
|
||||
int has_param_n=0;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
@@ -163,61 +162,51 @@ int main(int argc, char *argv[]){
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
if ( has_param_n == 1 && n <= m )
|
||||
n=n;
|
||||
else
|
||||
n=m;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg = time1/loops;
|
||||
timeg /= loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -209,4 +209,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -266,4 +266,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -35,19 +35,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#undef GER
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define GER BLASFUNC(zgeru)
|
||||
#else
|
||||
#define GER BLASFUNC(cgeru)
|
||||
#endif
|
||||
#else
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define GER BLASFUNC(dger)
|
||||
#else
|
||||
#define GER BLASFUNC(sger)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
@@ -115,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -221,5 +214,5 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
|
||||
218
benchmark/gesv.c
218
benchmark/gesv.c
@@ -1,218 +0,0 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
#undef GESV
|
||||
#undef GETRS
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define GESV BLASFUNC(qgesv)
|
||||
#elif defined(DOUBLE)
|
||||
#define GESV BLASFUNC(dgesv)
|
||||
#else
|
||||
#define GESV BLASFUNC(sgesv)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define GESV BLASFUNC(xgesv)
|
||||
#elif defined(DOUBLE)
|
||||
#define GESV BLASFUNC(zgesv)
|
||||
#else
|
||||
#define GESV BLASFUNC(cgesv)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
|
||||
blasint m, i, j, info;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
b[i + j * m * COMPSIZE] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr,
|
||||
"%10.2f MFlops %10.6f s\n",
|
||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*work;
|
||||
FLOAT wkopt[4];
|
||||
@@ -231,4 +231,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -107,7 +107,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -189,4 +189,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -205,4 +205,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -106,7 +106,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -188,4 +188,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -186,4 +186,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
@@ -270,4 +270,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -52,11 +52,6 @@ C)
|
||||
awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
B)
|
||||
# Copy Benchmark
|
||||
awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
|
||||
*)
|
||||
awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2
|
||||
|
||||
@@ -88,10 +88,6 @@ double fabs(double);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
@@ -118,7 +114,7 @@ int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
@@ -282,5 +278,5 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
|
||||
202
benchmark/scal.c
202
benchmark/scal.c
@@ -1,202 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SCAL
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define SCAL BLASFUNC(zscal)
|
||||
#else
|
||||
#define SCAL BLASFUNC(cscal)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define SCAL BLASFUNC(dscal)
|
||||
#else
|
||||
#define SCAL BLASFUNC(sscal)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SCAL (&m, alpha, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_cgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
B = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_cgemm(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_cgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
B = randn(N).astype('float32') + randn(N).astype('float32') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_cgemv(i,LOOPS)
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
from scipy.linalg.blas import daxpy
|
||||
|
||||
|
||||
def run_daxpy(N,l):
|
||||
|
||||
x = randn(N).astype('float64')
|
||||
y = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
y = daxpy(x,y, a=2.0 )
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_daxpy(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_ddot(N,l):
|
||||
|
||||
A = randn(N).astype('float64')
|
||||
B = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_ddot(i,LOOPS)
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_deig(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
la,v = numpy.linalg.eig(A)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 26.33 *N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_deig(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgemm(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgemv(i,LOOPS)
|
||||
|
||||
@@ -1,58 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
from scipy.linalg.lapack import dgesv
|
||||
|
||||
def run_dgesv(N,l):
|
||||
|
||||
a = randn(N,N).astype('float64')
|
||||
b = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
dgesv(a,b,1,1)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
|
||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgesv(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dsolve(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.linalg.solve(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dsolve(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sdot(N,l):
|
||||
|
||||
A = randn(N).astype('float32')
|
||||
B = randn(N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sdot(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32')
|
||||
B = randn(N,N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sgemm(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32')
|
||||
B = randn(N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sgemv(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_zgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
B = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_zgemm(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_zgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
B = randn(N).astype('float64') + randn(N).astype('float64') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_zgemv(i,LOOPS)
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
B = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
B = single(rand(n,1)) + single(rand(n,1)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
[V,lambda] = eig(A);
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 26.33 *n*n*n ) *loops / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,1));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,59 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
x = linsolve(A,B);
|
||||
#x = A / B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
#r = norm(A*x - B)/norm(B)
|
||||
mflops = ( 2.0/3.0 *n*n*n + 2.0*n*n*n ) *loops / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n));
|
||||
B = single(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n));
|
||||
B = single(rand(n,1));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
B = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,56 +0,0 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
B = double(rand(n,1)) + double(rand(n,1)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
@@ -1,62 +0,0 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
ev <- eigen(A)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
C <- A %*% B
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -1,63 +0,0 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
solve(A,B)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
}
|
||||
|
||||
|
||||
201
benchmark/swap.c
201
benchmark/swap.c
@@ -1,201 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above swapright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above swapright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE SWAPRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SWAP
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define SWAP BLASFUNC(zswap)
|
||||
#else
|
||||
#define SWAP BLASFUNC(cswap)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define SWAP BLASFUNC(dswap)
|
||||
#else
|
||||
#define SWAP BLASFUNC(sswap)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SWAP (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -200,4 +200,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -215,4 +215,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -200,4 +200,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -196,4 +196,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -199,4 +199,4 @@ int main(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -130,21 +130,11 @@ int main(int argc, char *argv[]){
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
|
||||
int l;
|
||||
int loops = 1;
|
||||
double timeg;
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
@@ -160,7 +150,7 @@ int main(int argc, char *argv[]){
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c Loops = %d\n", from, to, step,side,uplo,trans,diag,loops);
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
@@ -181,39 +171,32 @@ int main(int argc, char *argv[]){
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0.0;
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
time1 = timeg/loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -1,196 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#define RETURN_BY_STACK 1
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(zdotu)
|
||||
#else
|
||||
#define DOT BLASFUNC(cdotu)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT _Complex result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
DOT (&result, &m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
195
benchmark/zdot.c
195
benchmark/zdot.c
@@ -1,195 +0,0 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(zdotu)
|
||||
#else
|
||||
#define DOT BLASFUNC(cdotu)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT _Complex result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
7
c_check
7
c_check
@@ -4,8 +4,6 @@
|
||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||
|
||||
$binary = $ENV{"BINARY"};
|
||||
|
||||
@@ -57,7 +55,6 @@ $os = osf if ($data =~ /OS_OSF/);
|
||||
$os = WINNT if ($data =~ /OS_WINNT/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
|
||||
$os = Interix if ($data =~ /OS_INTERIX/);
|
||||
$os = Android if ($data =~ /OS_ANDROID/);
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||
@@ -84,10 +81,6 @@ if (($architecture eq "mips32") || ($architecture eq "mips64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if (($architecture eq "arm") || ($architecture eq "arm64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if ($architecture eq "alpha") {
|
||||
$defined = 1;
|
||||
$binary = 64;
|
||||
|
||||
16
cblas.h
16
cblas.h
@@ -13,12 +13,6 @@ extern "C" {
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
@@ -347,16 +341,6 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum
|
||||
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
@@ -13,12 +13,6 @@ extern "C" {
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
@@ -333,16 +327,6 @@ void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, bl
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
|
||||
blasint clda, blasint cldb);
|
||||
|
||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
|
||||
double *c, blasint cldc);
|
||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
|
||||
double *c, blasint cldc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
21
common.h
21
common.h
@@ -93,10 +93,6 @@ extern "C" {
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#ifdef OS_ANDROID
|
||||
#define NO_SYSV_IPC
|
||||
#endif
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
#ifdef ATOM
|
||||
#define GOTO_ATOM ATOM
|
||||
@@ -110,9 +106,7 @@ extern "C" {
|
||||
#endif
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#ifndef NO_SYSV_IPC
|
||||
#include <sys/shm.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
@@ -276,11 +270,6 @@ typedef int blasint;
|
||||
#define SIZE 8
|
||||
#define BASE_SHIFT 3
|
||||
#define ZBASE_SHIFT 4
|
||||
#elif defined(INTEGER) //extend for integer matrix
|
||||
#define FLOAT int
|
||||
#define SIZE 4
|
||||
#define BASE_SHIFT 2
|
||||
#define ZBASE_SHIFT 3
|
||||
#else
|
||||
#define FLOAT float
|
||||
#define SIZE 4
|
||||
@@ -338,14 +327,6 @@ typedef int blasint;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef STEAMROLLER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
@@ -510,8 +491,6 @@ void blas_set_parameter(void);
|
||||
int blas_get_cpu_number(void);
|
||||
void *blas_memory_alloc (int);
|
||||
void blas_memory_free (void *);
|
||||
void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
|
||||
void blas_memory_free_nolock (void *);
|
||||
|
||||
int get_num_procs (void);
|
||||
|
||||
|
||||
52
common_arm.h
52
common_arm.h
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011-2015, The OpenBLAS Project
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,21 +30,50 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM
|
||||
#define COMMON_ARM
|
||||
|
||||
#if defined(ARMV5) || defined(ARMV6)
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#else
|
||||
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
#endif
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
@@ -59,12 +88,9 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: \n\t"
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"cmp r3, #0 \n\t"
|
||||
"bne 1b \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011-2015, The OpenBLAS Project
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -30,12 +30,49 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM64
|
||||
#define COMMON_ARM64
|
||||
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
@@ -44,30 +81,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
/*
|
||||
int register ret;
|
||||
int register tmp;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: \n\t"
|
||||
"ldaxr %2, [%1] \n\t"
|
||||
"mov %2, #0 \n\t"
|
||||
"stlxr %w0, %2, [%1] \n\t"
|
||||
"cbnz %w0, 1b \n\t"
|
||||
"mov %0 , #0 \n\t"
|
||||
: "=r"(ret), "=r"(address), "=r"(tmp)
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "%w0"
|
||||
//, "%r2" , "%r3"
|
||||
: "memory", "r2" , "r3"
|
||||
|
||||
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
@@ -133,4 +166,3 @@ REALNAME:
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -220,7 +220,6 @@
|
||||
#define COMATCOPY_K_CTC comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC comatcopy_k_rtc
|
||||
|
||||
#define CGEADD_K cgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
@@ -403,7 +402,6 @@
|
||||
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
|
||||
#define CGEADD_K gotoblas -> cgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -149,7 +149,6 @@
|
||||
#define DOMATCOPY_K_RN domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT domatcopy_k_rt
|
||||
#define DGEADD_K dgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
@@ -268,8 +267,6 @@
|
||||
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
|
||||
|
||||
#define DGEADD_K gotoblas -> dgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define DGEMM_NN dgemm_nn
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef COMMON_I_H
|
||||
#define COMMON_I_H
|
||||
|
||||
#ifndef DYNAMIC_ARCH
|
||||
#define IAXPYU_K iaxpy_k
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
#endif
|
||||
@@ -93,7 +93,6 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo
|
||||
|
||||
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *);
|
||||
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
@@ -755,12 +754,6 @@ void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, do
|
||||
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
|
||||
void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
||||
@@ -60,8 +60,6 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int,
|
||||
int *, BLASLONG, int *, BLASLONG, int *, BLASLONG);
|
||||
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,
|
||||
|
||||
@@ -1762,11 +1762,6 @@ int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, dou
|
||||
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
|
||||
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
|
||||
int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG);
|
||||
int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
}
|
||||
|
||||
@@ -47,10 +47,6 @@
|
||||
#include "common_z.h"
|
||||
#include "common_x.h"
|
||||
|
||||
#ifdef INTEGER_PRECISION
|
||||
#include "common_i.h"
|
||||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
|
||||
@@ -638,10 +634,7 @@
|
||||
#define OMATCOPY_K_RN DOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT DOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT DOMATCOPY_K_RT
|
||||
#define GEADD_K DGEADD_K
|
||||
|
||||
#elif defined(INTEGER)
|
||||
#define AXPYU_K IAXPYU_K
|
||||
#else
|
||||
|
||||
#define AMAX_K SAMAX_K
|
||||
@@ -939,7 +932,6 @@
|
||||
#define OMATCOPY_K_CT SOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT SOMATCOPY_K_RT
|
||||
|
||||
#define GEADD_K SGEADD_K
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
@@ -1754,7 +1746,6 @@
|
||||
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
|
||||
#define GEADD_K ZGEADD_K
|
||||
|
||||
#else
|
||||
|
||||
@@ -2168,8 +2159,6 @@
|
||||
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
|
||||
|
||||
#define GEADD_K CGEADD_K
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -855,10 +855,6 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
} gotoblas_t;
|
||||
|
||||
|
||||
@@ -153,7 +153,6 @@
|
||||
#define SOMATCOPY_K_CT somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT somatcopy_k_rt
|
||||
|
||||
#define SGEADD_K sgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
@@ -275,7 +274,6 @@
|
||||
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
|
||||
|
||||
#define SGEADD_K gotoblas -> sgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -65,7 +65,6 @@ extern int blas_omp_linked;
|
||||
#define BLAS_XDOUBLE 0x0002U
|
||||
#define BLAS_REAL 0x0000U
|
||||
#define BLAS_COMPLEX 0x0004U
|
||||
#define BLAS_INTEGER 0x0008U
|
||||
|
||||
#define BLAS_TRANSA 0x0030U /* 2bit */
|
||||
#define BLAS_TRANSA_N 0x0000U
|
||||
|
||||
@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define MMXSTORE movd
|
||||
#endif
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
@@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
|
||||
#ifdef ASSEMBLER
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
@@ -220,7 +220,6 @@
|
||||
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K zgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
@@ -404,8 +403,6 @@
|
||||
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K gotoblas -> zgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define ZGEMM_NN zgemm_nn
|
||||
|
||||
12
cpuid.h
12
cpuid.h
@@ -104,12 +104,10 @@
|
||||
#define CORE_ATOM 18
|
||||
#define CORE_NANO 19
|
||||
#define CORE_SANDYBRIDGE 20
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
#define CORE_PILEDRIVER 23
|
||||
#define CORE_HASWELL 24
|
||||
#define CORE_STEAMROLLER 25
|
||||
#define CORE_EXCAVATOR 26
|
||||
#define CORE_HASWELL 24
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
@@ -202,8 +200,6 @@ typedef struct {
|
||||
#define CPUTYPE_BOBCAT 45
|
||||
#define CPUTYPE_BULLDOZER 46
|
||||
#define CPUTYPE_PILEDRIVER 47
|
||||
#define CPUTYPE_HASWELL 48
|
||||
#define CPUTYPE_STEAMROLLER 49
|
||||
#define CPUTYPE_EXCAVATOR 50
|
||||
#define CPUTYPE_HASWELL 48
|
||||
|
||||
#endif
|
||||
|
||||
95
cpuid_arm.c
95
cpuid_arm.c
@@ -30,27 +30,16 @@
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV6 1
|
||||
#define CPU_ARMV7 2
|
||||
#define CPU_CORTEXA9 3
|
||||
#define CPU_CORTEXA15 4
|
||||
#define CPU_CORTEXA15 3
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"ARMV6",
|
||||
"ARMV7",
|
||||
"CORTEXA9",
|
||||
"CORTEXA15"
|
||||
};
|
||||
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
"unknown",
|
||||
"armv6",
|
||||
"armv7",
|
||||
"cortexa9",
|
||||
"cortexa15"
|
||||
};
|
||||
|
||||
|
||||
int get_feature(char *search)
|
||||
{
|
||||
|
||||
@@ -96,29 +85,6 @@ int detect(void)
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("CPU part", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
if(p != NULL) {
|
||||
if (strstr(p, "0xc09")) {
|
||||
return CPU_CORTEXA9;
|
||||
}
|
||||
if (strstr(p, "0xc0f")) {
|
||||
return CPU_CORTEXA15;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p = (char *) NULL ;
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
@@ -176,7 +142,21 @@ void get_architecture(void)
|
||||
void get_subarchitecture(void)
|
||||
{
|
||||
int d = detect();
|
||||
printf("%s", cpuname[d]);
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("ARMV7");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("ARMV6");
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("UNKNOWN");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void get_subdirname(void)
|
||||
@@ -190,36 +170,6 @@ void get_cpuconfig(void)
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
case CPU_CORTEXA9:
|
||||
printf("#define CORTEXA9\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
case CPU_CORTEXA15:
|
||||
printf("#define CORTEXA15\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("#define ARMV7\n");
|
||||
@@ -256,7 +206,18 @@ void get_libname(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
printf("%s", cpuname_lower[d]);
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("armv7\n");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("armv6\n");
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
111
cpuid_x86.c
111
cpuid_x86.c
@@ -1098,16 +1098,6 @@ int get_cpuname(void){
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 13:
|
||||
//Broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
@@ -1122,36 +1112,11 @@ int get_cpuname(void){
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 7:
|
||||
case 15:
|
||||
//Broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
switch (model) {
|
||||
case 6:
|
||||
//Broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
@@ -1197,21 +1162,6 @@ int get_cpuname(void){
|
||||
return CPUTYPE_PILEDRIVER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
case 0:
|
||||
switch(exmodel){
|
||||
case 3:
|
||||
if(support_avx())
|
||||
return CPUTYPE_STEAMROLLER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
|
||||
case 6:
|
||||
if(support_avx())
|
||||
return CPUTYPE_EXCAVATOR;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
@@ -1340,8 +1290,6 @@ static char *cpuname[] = {
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
"STEAMROLLER",
|
||||
"EXCAVATOR",
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
@@ -1393,8 +1341,6 @@ static char *lowercpuname[] = {
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
"steamroller",
|
||||
"excavator",
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
@@ -1423,8 +1369,6 @@ static char *corename[] = {
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
"STEAMROLLER",
|
||||
"EXCAVATOR",
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
@@ -1453,8 +1397,6 @@ static char *corename_lower[] = {
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
"steamroller",
|
||||
"excavator",
|
||||
};
|
||||
|
||||
|
||||
@@ -1573,16 +1515,6 @@ int get_coretype(void){
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 13:
|
||||
//broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
@@ -1597,36 +1529,11 @@ int get_coretype(void){
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 7:
|
||||
case 15:
|
||||
//broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
switch (model) {
|
||||
case 6:
|
||||
//broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -1655,25 +1562,7 @@ int get_coretype(void){
|
||||
return CORE_PILEDRIVER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
|
||||
case 0:
|
||||
switch(exmodel){
|
||||
case 3:
|
||||
if(support_avx())
|
||||
return CORE_STEAMROLLER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
|
||||
case 6:
|
||||
if(support_avx())
|
||||
return CORE_EXCAVATOR;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}else return CORE_BARCELONA;
|
||||
}
|
||||
}
|
||||
|
||||
4
ctest.c
4
ctest.c
@@ -44,10 +44,6 @@ COMPILER_DEC
|
||||
COMPILER_GNU
|
||||
#endif
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
OS_ANDROID
|
||||
#endif
|
||||
|
||||
#if defined(__linux__)
|
||||
OS_LINUX
|
||||
#endif
|
||||
|
||||
@@ -27,18 +27,12 @@ ctestl2o = c_cblas2.o c_c2chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ctestl3o_3m = c_cblas3_3m.o c_c3chke_3m.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ztestl1o = c_zblas1.o
|
||||
|
||||
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
|
||||
|
||||
all :: all1 all2 all3
|
||||
|
||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
||||
@@ -121,8 +115,8 @@ xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
|
||||
xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xccblat3_3m: $(ctestl3o) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
# Double complex
|
||||
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
@@ -133,8 +127,8 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
|
||||
xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xzcblat3_3m: $(ztestl3o) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
||||
230
ctest/c_c3chke.c
230
ctest/c_c3chke.c
@@ -46,7 +46,235 @@ void F77_c3chke(char * rout) {
|
||||
}
|
||||
|
||||
|
||||
if (strncmp( sf,"cblas_cgemm" ,11)==0) {
|
||||
if (strncmp( sf,"cblas_cgemm3m" ,13)==0) {
|
||||
cblas_rout = "cblas_cgemm3" ;
|
||||
|
||||
cblas_info = 1;
|
||||
cblas_cgemm3m( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm3m( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm3m( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm3m( INVALID, CblasTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, INVALID, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
|
||||
} else if (strncmp( sf,"cblas_cgemm" ,11)==0) {
|
||||
cblas_rout = "cblas_cgemm" ;
|
||||
|
||||
|
||||
|
||||
1936
ctest/c_c3chke_3m.c
1936
ctest/c_c3chke_3m.c
File diff suppressed because it is too large
Load Diff
@@ -567,3 +567,81 @@ void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
|
||||
|
||||
|
||||
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A, *B, *C;
|
||||
int i,j,LDA, LDB, LDC;
|
||||
enum CBLAS_TRANSPOSE transa, transb;
|
||||
|
||||
get_transpose_type(transpa, &transa);
|
||||
get_transpose_type(transpb, &transb);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (transa == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
if (transb == CblasNoTrans) {
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDB = *k+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
|
||||
B, LDB, beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
else
|
||||
cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,647 +0,0 @@
|
||||
/*
|
||||
* Written by D.P. Manley, Digital Equipment Corporation.
|
||||
* Prefixed "C_" to BLAS routines and their declarations.
|
||||
*
|
||||
* Modified by T. H. Do, 4/15/98, SGI/CRAY Research.
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
#include "common.h"
|
||||
#include "cblas_test.h"
|
||||
|
||||
#define TEST_COL_MJR 0
|
||||
#define TEST_ROW_MJR 1
|
||||
#define UNDEFINED -1
|
||||
|
||||
void F77_cgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A, *B, *C;
|
||||
int i,j,LDA, LDB, LDC;
|
||||
enum CBLAS_TRANSPOSE transa, transb;
|
||||
|
||||
get_transpose_type(transpa, &transa);
|
||||
get_transpose_type(transpb, &transb);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (transa == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
if (transb == CblasNoTrans) {
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDB = *k+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_cgemm( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
|
||||
B, LDB, beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_cgemm( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
else
|
||||
cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
}
|
||||
|
||||
void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A, *B, *C;
|
||||
int i,j,LDA, LDB, LDC;
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_SIDE side;
|
||||
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_side_type(rtlf,&side);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A= (CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDB*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_chemm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
|
||||
beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_chemm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
beta, c, *ldc );
|
||||
else
|
||||
cblas_chemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
beta, c, *ldc );
|
||||
}
|
||||
void F77_csymm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A, *B, *C;
|
||||
int i,j,LDA, LDB, LDC;
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_SIDE side;
|
||||
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_side_type(rtlf,&side);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX ));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
cblas_csymm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB,
|
||||
beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
c[j*(*ldc)+i]=C[i*LDC+j];
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_csymm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
beta, c, *ldc );
|
||||
else
|
||||
cblas_csymm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb,
|
||||
beta, c, *ldc );
|
||||
}
|
||||
|
||||
void F77_cherk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
float *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
float *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
int i,j,LDA,LDC;
|
||||
CBLAS_TEST_COMPLEX *A, *C;
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_TRANSPOSE trans;
|
||||
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_transpose_type(transp,&trans);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_cherk(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA, *beta,
|
||||
C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_cherk(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
c, *ldc );
|
||||
else
|
||||
cblas_cherk(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, *beta,
|
||||
c, *ldc );
|
||||
}
|
||||
|
||||
void F77_csyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
int i,j,LDA,LDC;
|
||||
CBLAS_TEST_COMPLEX *A, *C;
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_TRANSPOSE trans;
|
||||
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_transpose_type(transp,&trans);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_csyrk(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA, beta,
|
||||
C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_csyrk(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda, beta,
|
||||
c, *ldc );
|
||||
else
|
||||
cblas_csyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta,
|
||||
c, *ldc );
|
||||
}
|
||||
void F77_cher2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, float *beta,
|
||||
CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
int i,j,LDA,LDB,LDC;
|
||||
CBLAS_TEST_COMPLEX *A, *B, *C;
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_TRANSPOSE trans;
|
||||
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_transpose_type(transp,&trans);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
LDB = *k+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ));
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX ));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDA = *n+1;
|
||||
LDB = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc( LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc( LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_cher2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
|
||||
B, LDB, *beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_cher2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, *beta, c, *ldc );
|
||||
else
|
||||
cblas_cher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, *beta, c, *ldc );
|
||||
}
|
||||
void F77_csyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
int i,j,LDA,LDB,LDC;
|
||||
CBLAS_TEST_COMPLEX *A, *B, *C;
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_TRANSPOSE trans;
|
||||
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_transpose_type(transp,&trans);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
LDB = *k+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDA = *n+1;
|
||||
LDB = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_csyr2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA,
|
||||
B, LDB, beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*n; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_csyr2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
else
|
||||
cblas_csyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
}
|
||||
void F77_ctrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
|
||||
int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
|
||||
int i,j,LDA,LDB;
|
||||
CBLAS_TEST_COMPLEX *A, *B;
|
||||
enum CBLAS_SIDE side;
|
||||
enum CBLAS_DIAG diag;
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_TRANSPOSE trans;
|
||||
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_transpose_type(transp,&trans);
|
||||
get_diag_type(diagn,&diag);
|
||||
get_side_type(rtlf,&side);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
cblas_ctrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
A, LDA, B, LDB );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
b[j*(*ldb)+i].real=B[i*LDB+j].real;
|
||||
b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_ctrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
a, *lda, b, *ldb);
|
||||
else
|
||||
cblas_ctrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
|
||||
a, *lda, b, *ldb);
|
||||
}
|
||||
|
||||
void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a,
|
||||
int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) {
|
||||
int i,j,LDA,LDB;
|
||||
CBLAS_TEST_COMPLEX *A, *B;
|
||||
enum CBLAS_SIDE side;
|
||||
enum CBLAS_DIAG diag;
|
||||
enum CBLAS_UPLO uplo;
|
||||
enum CBLAS_TRANSPOSE trans;
|
||||
|
||||
get_uplo_type(uplow,&uplo);
|
||||
get_transpose_type(transp,&trans);
|
||||
get_diag_type(diagn,&diag);
|
||||
get_side_type(rtlf,&side);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
cblas_ctrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
A, LDA, B, LDB );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
b[j*(*ldb)+i].real=B[i*LDB+j].real;
|
||||
b[j*(*ldb)+i].imag=B[i*LDB+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_ctrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha,
|
||||
a, *lda, b, *ldb);
|
||||
else
|
||||
cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha,
|
||||
a, *lda, b, *ldb);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda,
|
||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta,
|
||||
CBLAS_TEST_COMPLEX *c, int *ldc ) {
|
||||
|
||||
CBLAS_TEST_COMPLEX *A, *B, *C;
|
||||
int i,j,LDA, LDB, LDC;
|
||||
enum CBLAS_TRANSPOSE transa, transb;
|
||||
|
||||
get_transpose_type(transpa, &transa);
|
||||
get_transpose_type(transpb, &transb);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (transa == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
if (transb == CblasNoTrans) {
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDB = *k+1;
|
||||
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
|
||||
B, LDB, beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
else
|
||||
cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
}
|
||||
|
||||
|
||||
232
ctest/c_z3chke.c
232
ctest/c_z3chke.c
@@ -49,7 +49,237 @@ void F77_z3chke(char * rout) {
|
||||
|
||||
|
||||
|
||||
if (strncmp( sf,"cblas_zgemm" ,11)==0) {
|
||||
if (strncmp( sf,"cblas_zgemm3m" ,13)==0) {
|
||||
cblas_rout = "cblas_zgemm3" ;
|
||||
|
||||
cblas_info = 1;
|
||||
cblas_zgemm3m( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_zgemm3m( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_zgemm3m( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_zgemm3m( INVALID, CblasTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, INVALID, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_zgemm3m( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_zgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
|
||||
|
||||
|
||||
} else if (strncmp( sf,"cblas_zgemm" ,11)==0) {
|
||||
cblas_rout = "cblas_zgemm" ;
|
||||
|
||||
cblas_info = 1;
|
||||
|
||||
1940
ctest/c_z3chke_3m.c
1940
ctest/c_z3chke_3m.c
File diff suppressed because it is too large
Load Diff
@@ -564,3 +564,80 @@ void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
||||
}
|
||||
|
||||
|
||||
void F77_zgemm3m(int *order, char *transpa, char *transpb, int *m, int *n,
|
||||
int *k, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda,
|
||||
CBLAS_TEST_ZOMPLEX *b, int *ldb, CBLAS_TEST_ZOMPLEX *beta,
|
||||
CBLAS_TEST_ZOMPLEX *c, int *ldc ) {
|
||||
|
||||
CBLAS_TEST_ZOMPLEX *A, *B, *C;
|
||||
int i,j,LDA, LDB, LDC;
|
||||
enum CBLAS_TRANSPOSE transa, transb;
|
||||
|
||||
get_transpose_type(transpa, &transa);
|
||||
get_transpose_type(transpb, &transb);
|
||||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
if (transa == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
if (transb == CblasNoTrans) {
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_ZOMPLEX) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
else {
|
||||
LDB = *k+1;
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
}
|
||||
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag;
|
||||
}
|
||||
cblas_zgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA,
|
||||
B, LDB, beta, C, LDC );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
c[j*(*ldc)+i].real=C[i*LDC+j].real;
|
||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag;
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
}
|
||||
else if (*order == TEST_COL_MJR)
|
||||
cblas_zgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
else
|
||||
cblas_zgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda,
|
||||
b, *ldb, beta, c, *ldc );
|
||||
}
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user