Compare commits
201 Commits
v0.2.12
...
integer_da
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
034ffa93fa | ||
|
|
3f1b57668e | ||
|
|
d8f18d32c3 | ||
|
|
bdb5c842fc | ||
|
|
e7c969e164 | ||
|
|
9bd962f655 | ||
|
|
4f5691e5c0 | ||
|
|
29293160a4 | ||
|
|
3e33afef2e | ||
|
|
8614057ea9 | ||
|
|
7f375f9e8f | ||
|
|
69c5169e7d | ||
|
|
e19948baa1 | ||
|
|
a2eaf234fc | ||
|
|
6a13a94e71 | ||
|
|
eff43d3289 | ||
|
|
9c4817d07b | ||
|
|
319f3a0451 | ||
|
|
02c7766f68 | ||
|
|
f38cb67ca8 | ||
|
|
eea2e30b74 | ||
|
|
19b8fd2aed | ||
|
|
0cc5212741 | ||
|
|
c47c8e8cf5 | ||
|
|
a11555c715 | ||
|
|
897d03518e | ||
|
|
23fbc5728e | ||
|
|
6d40fa587f | ||
|
|
22dcd79959 | ||
|
|
ea4df0aad3 | ||
|
|
e127fb8fd8 | ||
|
|
7fb718a7d8 | ||
|
|
24f58c8bb1 | ||
|
|
95b1faf667 | ||
|
|
2d9e406050 | ||
|
|
59083e3ce1 | ||
|
|
685be40339 | ||
|
|
31c9e399e9 | ||
|
|
7de6bb9889 | ||
|
|
d63034303b | ||
|
|
51ff17d46e | ||
|
|
905534942a | ||
|
|
18e90ee2e3 | ||
|
|
e00cccc41e | ||
|
|
73f09bf64f | ||
|
|
02e772c7e4 | ||
|
|
7aee913991 | ||
|
|
e50a933037 | ||
|
|
5f9011d6ef | ||
|
|
ebb9eba987 | ||
|
|
8e5a1083bb | ||
|
|
6743beb748 | ||
|
|
bcabf72c08 | ||
|
|
cda29f183b | ||
|
|
e52d36450a | ||
|
|
f8f2e261fe | ||
|
|
be3c843700 | ||
|
|
e6f57db846 | ||
|
|
9bfd267d51 | ||
|
|
924bc5372e | ||
|
|
2b83a69650 | ||
|
|
133c11a156 | ||
|
|
30f52d53df | ||
|
|
a124637329 | ||
|
|
642aaba2e0 | ||
|
|
4c616173e4 | ||
|
|
5e83d80725 | ||
|
|
b2e1797dc6 | ||
|
|
e216f686cb | ||
|
|
e42652f772 | ||
|
|
e77db2af31 | ||
|
|
37b00841ac | ||
|
|
fc0e0391f3 | ||
|
|
da0f27b9ac | ||
|
|
c22068c406 | ||
|
|
dee100d0e4 | ||
|
|
0273966abb | ||
|
|
3a67daa954 | ||
|
|
ab567d8443 | ||
|
|
3c09cea4b2 | ||
|
|
b4f2153dcd | ||
|
|
1c4b0eeae3 | ||
|
|
406d9d64e9 | ||
|
|
1bec9abb9a | ||
|
|
3814bf60d3 | ||
|
|
847e19c04e | ||
|
|
46c7b4d5c8 | ||
|
|
8e05d291b5 | ||
|
|
9da555e5f7 | ||
|
|
6d0db0151f | ||
|
|
37b9033c90 | ||
|
|
59e7a518c6 | ||
|
|
13889515b3 | ||
|
|
248c9340c3 | ||
|
|
e9f33b4ca7 | ||
|
|
f5d847122a | ||
|
|
a4c96eca67 | ||
|
|
fb02cb0a41 | ||
|
|
baa0363ea2 | ||
|
|
34ba66606a | ||
|
|
f615dc7603 | ||
|
|
331c417637 | ||
|
|
6c3a0b5d46 | ||
|
|
fd9fd42936 | ||
|
|
9798481979 | ||
|
|
d7a17ad85d | ||
|
|
d35f6c63c2 | ||
|
|
166d76e864 | ||
|
|
f9f127d838 | ||
|
|
62231ab337 | ||
|
|
3119def9a7 | ||
|
|
33b332372a | ||
|
|
fd838c75bc | ||
|
|
b57a60dac8 | ||
|
|
5c51163972 | ||
|
|
9299d8cfd6 | ||
|
|
0a3d3b945d | ||
|
|
4f680a7d61 | ||
|
|
ba926e807c | ||
|
|
60c6dec6e6 | ||
|
|
47898cca35 | ||
|
|
53bb924287 | ||
|
|
1e80b8b0d3 | ||
|
|
a901b065d3 | ||
|
|
3937e2a0a0 | ||
|
|
9707d608d5 | ||
|
|
701b9d7556 | ||
|
|
8977b3f235 | ||
|
|
f6426395ea | ||
|
|
0ac787eefe | ||
|
|
e5b96e55a7 | ||
|
|
a3491e1e88 | ||
|
|
e81a5d61e4 | ||
|
|
c674fa32be | ||
|
|
e34911a73d | ||
|
|
76dcaf2281 | ||
|
|
770fac92eb | ||
|
|
e95d64333a | ||
|
|
75c40bcc48 | ||
|
|
b62f9f4120 | ||
|
|
b6438dedea | ||
|
|
cdefdb21cd | ||
|
|
ea7f9dacf4 | ||
|
|
bf5dbb7e2a | ||
|
|
39cc6b21d3 | ||
|
|
771b18ae9c | ||
|
|
cfa9392ffa | ||
|
|
1ccd57ce80 | ||
|
|
65a847cd36 | ||
|
|
07ff001981 | ||
|
|
b17ccb4c5c | ||
|
|
63c6fcfa0a | ||
|
|
29cb47fc06 | ||
|
|
4e6c4046f7 | ||
|
|
229ce2ccd1 | ||
|
|
ef75be0e51 | ||
|
|
5344f335a8 | ||
|
|
5cb5af9333 | ||
|
|
41aad0407f | ||
|
|
f8f2e84659 | ||
|
|
34633fef01 | ||
|
|
ddf983d643 | ||
|
|
17b9db20f1 | ||
|
|
0dc559ed30 | ||
|
|
9566f5fdb0 | ||
|
|
4319769b79 | ||
|
|
e9d9a8eae3 | ||
|
|
cbb3ab80e7 | ||
|
|
cd9868b1b4 | ||
|
|
eb738148fe | ||
|
|
587e16fba3 | ||
|
|
4de7b9ae47 | ||
|
|
887aed634d | ||
|
|
6261342de3 | ||
|
|
1e566223ed | ||
|
|
113b48ca22 | ||
|
|
3e81c99b6b | ||
|
|
ec85c4a51d | ||
|
|
97de657d38 | ||
|
|
71966eba6c | ||
|
|
a359979e17 | ||
|
|
7a6a141bc4 | ||
|
|
b8ff6892f6 | ||
|
|
8fe7a9ce6f | ||
|
|
bc5fff7085 | ||
|
|
1943ea91a8 | ||
|
|
f5424fc9de | ||
|
|
0cf29ba6d2 | ||
|
|
50e18033e6 | ||
|
|
551b55d1c7 | ||
|
|
271ceb8bae | ||
|
|
5f846be2e4 | ||
|
|
fe7dcf98f3 | ||
|
|
2fb02626da | ||
|
|
a85c2785ae | ||
|
|
4806715c97 | ||
|
|
58c90d5937 | ||
|
|
2987bc7b40 | ||
|
|
695e0fa649 | ||
|
|
cbb23c46c2 | ||
|
|
0b4602b753 |
@@ -117,5 +117,15 @@ In chronological order:
|
||||
* Isaac Dunham <https://github.com/idunham>
|
||||
* [2014-08-03] Fixed link error on Linux/musl
|
||||
|
||||
* Dave Nuechterlein
|
||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||
ARMv8 support.
|
||||
|
||||
* Dan Kortschak
|
||||
* [2015-01-07] Added test for drotmg bug #484.
|
||||
|
||||
* Ton van den Heuvel <https://github.com/ton>
|
||||
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().
|
||||
|
||||
* [Your name or handle] <[email or website]>
|
||||
* [Date] [Brief summary of your changes]
|
||||
|
||||
@@ -1,4 +1,45 @@
|
||||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.14
|
||||
24-Mar-2015
|
||||
common:
|
||||
* Improve OpenBLASConfig.cmake. (#474, #475. Thanks, xantares.)
|
||||
* Improve ger and gemv for small matrices by stack allocation.
|
||||
e.g. make -DMAX_STACK_ALLOC=2048 (#482. Thanks, Jerome Robert.)
|
||||
* Introduce openblas_get_num_threads and openblas_get_num_procs.
|
||||
(#497. Thanks, Erik Schnetter.)
|
||||
* Add ATLAS-style ?geadd function. (#509. Thanks, Martin Köhler.)
|
||||
* Fix c/zsyr bug with negative incx. (#492.)
|
||||
* Fix race condition during shutdown causing a crash in
|
||||
gotoblas_set_affinity(). (#508. Thanks, Ton van den Heuvel.)
|
||||
|
||||
x86/x86-64:
|
||||
* Support AMD Streamroller.
|
||||
|
||||
ARM:
|
||||
* Add Cortex-A9 and Cortex-A15 targets.
|
||||
|
||||
====================================================================
|
||||
Version 0.2.13
|
||||
3-Dec-2014
|
||||
common:
|
||||
* Add SYMBOLPREFIX and SYMBOLSUFFIX makefile options
|
||||
for adding a prefix or suffix to all exported symbol names
|
||||
in the shared library.(#459, Thanks Tony Kelman)
|
||||
* Provide OpenBLASConfig.cmake at installation.
|
||||
* Fix Fortran compiler detection on FreeBSD.
|
||||
(#470, Thanks Mike Nolta)
|
||||
|
||||
|
||||
x86/x86-64:
|
||||
* Add generic kernel files for x86-64. make TARGET=GENERIC
|
||||
* Fix a bug of sgemm kernel on Intel Sandy Bridge.
|
||||
* Fix c_check bug on some amd64 systems. (#471, Thanks Mike Nolta)
|
||||
|
||||
ARM:
|
||||
* Support APM's X-Gene 1 AArch64 processors.
|
||||
Optimize trmm and sgemm. (#465, Thanks Dave Nuechterlein)
|
||||
|
||||
====================================================================
|
||||
Version 0.2.12
|
||||
13-Oct-2014
|
||||
|
||||
@@ -9,10 +9,10 @@
|
||||
|
||||
If you want to allocate 64 large pages,
|
||||
|
||||
$shell> echo 0 > /pros/sys/vm/nr_hugepages # need to be reset
|
||||
$shell> echo 65 > /pros/sys/vm/nr_hugepages # add 1 extra page
|
||||
$shell> echo 3355443200 > /pros/sys/kernel/shmmax # just large number
|
||||
$shell> echo 3355443200 > /pros/sys/kernel/shmall
|
||||
$shell> echo 0 > /proc/sys/vm/nr_hugepages # need to be reset
|
||||
$shell> echo 65 > /proc/sys/vm/nr_hugepages # add 1 extra page
|
||||
$shell> echo 3355443200 > /proc/sys/kernel/shmmax # just large number
|
||||
$shell> echo 3355443200 > /proc/sys/kernel/shmall
|
||||
|
||||
Also may add a few lines into /etc/security/limits.conf file.
|
||||
|
||||
|
||||
9
LICENSE
9
LICENSE
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -12,9 +12,10 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
|
||||
6
Makefile
6
Makefile
@@ -20,6 +20,8 @@ ifneq ($(NO_LAPACK), 1)
|
||||
SUBDIRS += lapack
|
||||
endif
|
||||
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench
|
||||
|
||||
.PHONY : all libs netlib test ctest shared install
|
||||
@@ -131,7 +133,7 @@ ifeq ($(CORE), UNKOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.)
|
||||
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
|
||||
endif
|
||||
ifeq ($(NO_STATIC), 1)
|
||||
ifeq ($(NO_SHARED), 1)
|
||||
@@ -231,7 +233,7 @@ ifndef NOFORTRAN
|
||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
|
||||
19
Makefile.arm
19
Makefile.arm
@@ -1,8 +1,23 @@
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
# ifeq logical or
|
||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV7)
|
||||
ifeq ($(OSNAME), Android)
|
||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
||||
else
|
||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV6)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
|
||||
@@ -9,6 +9,8 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
||||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
@@ -21,6 +23,7 @@ install : lib.grd
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
#for inc
|
||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||
@@ -43,11 +46,11 @@ ifndef NO_CBLAS
|
||||
endif
|
||||
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pDm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
@@ -83,13 +86,31 @@ ifeq ($(OSNAME), Darwin)
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR)
|
||||
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR)
|
||||
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR)
|
||||
endif
|
||||
endif
|
||||
|
||||
#Generating OpenBLASConfig.cmake
|
||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
|
||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
ifndef NO_SHARED
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
else
|
||||
#only static
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)
|
||||
endif
|
||||
@echo Install OK!
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.12
|
||||
VERSION = 0.2.14
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
@@ -114,6 +114,9 @@ NO_AFFINITY = 1
|
||||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
|
||||
# QUAD_PRECISION = 1
|
||||
|
||||
# Support for integer matrix and vector (e.g. iaxpy)
|
||||
# INTEGER_PRECISION = 1
|
||||
|
||||
# Theads are still working for a while after finishing BLAS operation
|
||||
# to reduce thread activate/deactivate overhead. You can determine
|
||||
# time out to improve performance. This number should be from 4 to 30
|
||||
@@ -159,6 +162,19 @@ COMMON_PROF = -pg
|
||||
# Build Debug version
|
||||
# DEBUG = 1
|
||||
|
||||
# Improve GEMV and GER for small matrices by stack allocation.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||
#
|
||||
MAX_STACK_ALLOC=2048
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoid conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459
|
||||
#
|
||||
# SYMBOLPREFIX=
|
||||
# SYMBOLSUFFIX=
|
||||
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
||||
@@ -23,6 +23,7 @@ CC = gcc
|
||||
UNAME_S := $(shell uname -s)
|
||||
ifeq ($(UNAME_S),Darwin)
|
||||
CC = clang
|
||||
# EXTRALIB += -Wl,-no_compact_unwind
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -61,6 +62,12 @@ endif
|
||||
ifeq ($(TARGET), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -85,6 +92,12 @@ endif
|
||||
ifeq ($(TARGET_CORE), PILEDRIVER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), STEAMROLLER)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), EXCAVATOR)
|
||||
GETARCH_FLAGS := -DFORCE_BARCELONA
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
@@ -186,13 +199,21 @@ LD = $(CROSS_SUFFIX)ld
|
||||
RANLIB = $(CROSS_SUFFIX)ranlib
|
||||
NM = $(CROSS_SUFFIX)nm
|
||||
DLLWRAP = $(CROSS_SUFFIX)dllwrap
|
||||
OBJCOPY = $(CROSS_SUFFIX)objcopy
|
||||
OBJCONV = $(CROSS_SUFFIX)objconv
|
||||
|
||||
|
||||
# For detect fortran failed, only build BLAS.
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
NO_LAPACK = 1
|
||||
endif
|
||||
|
||||
#
|
||||
# OS dependent settings
|
||||
#
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.2
|
||||
export MACOSX_DEPLOYMENT_TARGET=10.6
|
||||
MD5SUM = md5 -r
|
||||
endif
|
||||
|
||||
@@ -288,6 +309,10 @@ CCOMMON_OPT += -DQUAD_PRECISION
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifdef INTEGER_PRECISION
|
||||
CCOMMON_OPT += -DINTEGER_PRECISION
|
||||
endif
|
||||
|
||||
ifneq ($(ARCH), x86)
|
||||
ifneq ($(ARCH), x86_64)
|
||||
NO_EXPRECISION = 1
|
||||
@@ -303,6 +328,10 @@ ifdef SANITY_CHECK
|
||||
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
||||
endif
|
||||
|
||||
ifdef MAX_STACK_ALLOC
|
||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
||||
endif
|
||||
|
||||
#
|
||||
# Architecture dependent settings
|
||||
#
|
||||
@@ -352,6 +381,12 @@ endif
|
||||
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
|
||||
#check
|
||||
ifeq ($(USE_THREAD), 0)
|
||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
||||
endif
|
||||
|
||||
# ifeq logical or. GCC or LSB
|
||||
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC LSB))
|
||||
CCOMMON_OPT += -fopenmp
|
||||
@@ -390,7 +425,7 @@ endif
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
|
||||
endif
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += HASWELL
|
||||
@@ -560,7 +595,7 @@ else
|
||||
FCOMMON_OPT += -m32
|
||||
endif
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -fopenmp
|
||||
endif
|
||||
endif
|
||||
@@ -572,14 +607,14 @@ ifneq ($(INTERFACE64), 0)
|
||||
FCOMMON_OPT += -i8
|
||||
endif
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), FUJITSU)
|
||||
CCOMMON_OPT += -DF_INTERFACE_FUJITSU
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -597,7 +632,7 @@ endif
|
||||
else
|
||||
FCOMMON_OPT += -q32
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -615,7 +650,7 @@ FCOMMON_OPT += -tp p7-64
|
||||
else
|
||||
FCOMMON_OPT += -tp p7
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
endif
|
||||
@@ -644,7 +679,7 @@ FCOMMON_OPT += -mabi=n32
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
endif
|
||||
@@ -681,7 +716,7 @@ FCOMMON_OPT += -m64
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FEXTRALIB += -lstdc++
|
||||
FCOMMON_OPT += -mp
|
||||
endif
|
||||
@@ -729,14 +764,14 @@ FCOMMON_OPT += -m32
|
||||
else
|
||||
FCOMMON_OPT += -m64
|
||||
endif
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -xopenmp=parallel
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), COMPAQ)
|
||||
CCOMMON_OPT += -DF_INTERFACE_COMPAQ
|
||||
ifdef USE_OPENMP
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
FCOMMON_OPT += -openmp
|
||||
endif
|
||||
endif
|
||||
@@ -845,6 +880,14 @@ else
|
||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX)
|
||||
endif
|
||||
|
||||
ifndef SYMBOLPREFIX
|
||||
SYMBOLPREFIX =
|
||||
endif
|
||||
|
||||
ifndef SYMBOLSUFFIX
|
||||
SYMBOLSUFFIX =
|
||||
endif
|
||||
|
||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
|
||||
|
||||
include $(TOPDIR)/Makefile.$(ARCH)
|
||||
|
||||
@@ -4,6 +4,7 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
@@ -22,12 +23,18 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
ifdef INTEGER_PRECISION
|
||||
BLASOBJS += $(IBLASOBJS)
|
||||
BLASOBJS_P += $(IBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX
|
||||
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
@@ -35,6 +42,7 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
||||
libs :: $(BLASOBJS) $(COMMONOBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
@@ -60,6 +60,7 @@ Please read GotoBLAS_01Readme.txt
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
|
||||
#### MIPS64:
|
||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
|
||||
|
||||
@@ -32,6 +32,8 @@ ISTANBUL
|
||||
BOBCAT
|
||||
BULLDOZER
|
||||
PILEDRIVER
|
||||
STEAMROLLER
|
||||
EXCAVATOR
|
||||
|
||||
c)VIA CPU:
|
||||
SSE_GENERIC
|
||||
@@ -62,6 +64,11 @@ SPARC
|
||||
SPARCV7
|
||||
|
||||
6.ARM CPU:
|
||||
CORTEXA15
|
||||
CORTEXA9
|
||||
ARMV7
|
||||
ARMV6
|
||||
ARMV5
|
||||
|
||||
7.ARM 64-bit CPU:
|
||||
ARMV8
|
||||
|
||||
9
benchmark/Make_exe.sh
Executable file
9
benchmark/Make_exe.sh
Executable file
@@ -0,0 +1,9 @@
|
||||
#!/bin/bash
|
||||
|
||||
for f in *.goto *.acml *.mkl *.atlas
|
||||
do
|
||||
if [ -f "$f" ]; then
|
||||
mv $f `echo $f|tr '.' '_'`.exe
|
||||
fi
|
||||
done
|
||||
|
||||
1021
benchmark/Makefile
1021
benchmark/Makefile
File diff suppressed because it is too large
Load Diff
196
benchmark/asum.c
Normal file
196
benchmark/asum.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef ASUM
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dzasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(scasum)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define ASUM BLASFUNC(dasum)
|
||||
#else
|
||||
#define ASUM BLASFUNC(sasum)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
FLOAT result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = ASUM (&m, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -114,7 +114,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
@@ -198,4 +198,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -71,8 +71,14 @@ double fabs(double);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
@@ -99,6 +105,7 @@ int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static __inline double getmflops(int ratio, int m, double secs){
|
||||
|
||||
double mm = (double)m;
|
||||
@@ -117,7 +124,7 @@ static __inline double getmflops(int ratio, int m, double secs){
|
||||
}
|
||||
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
@@ -273,4 +280,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
201
benchmark/copy.c
Normal file
201
benchmark/copy.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef COPY
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define COPY BLASFUNC(zcopy)
|
||||
#else
|
||||
#define COPY BLASFUNC(ccopy)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define COPY BLASFUNC(dcopy)
|
||||
#else
|
||||
#define COPY BLASFUNC(scopy)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
COPY (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT result;
|
||||
@@ -192,4 +192,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -139,11 +139,12 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||
FLOAT wkopt[4];
|
||||
char job='V';
|
||||
char jobr='N';
|
||||
char *p;
|
||||
|
||||
blasint m, i, j, info,lwork;
|
||||
@@ -202,9 +203,9 @@ int MAIN__(int argc, char *argv[]){
|
||||
lwork = -1;
|
||||
m=to;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
@@ -226,16 +227,16 @@ int MAIN__(int argc, char *argv[]){
|
||||
|
||||
lwork = -1;
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
#ifndef COMPLEX
|
||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
|
||||
#else
|
||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
@@ -257,4 +258,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,14 +118,15 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
FLOAT beta [] = {1.0, 1.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
char trans='N';
|
||||
blasint m, i, j;
|
||||
blasint m, n, i, j;
|
||||
int loops = 1;
|
||||
int has_param_n=0;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
@@ -162,51 +163,61 @@ int MAIN__(int argc, char *argv[]){
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
if ((p = getenv("OPENBLAS_PARAM_N"))) {
|
||||
n = atoi(p);
|
||||
has_param_n=1;
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < to * COMPSIZE; i++){
|
||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
if ( has_param_n == 1 && n <= m )
|
||||
n=n;
|
||||
else
|
||||
n=m;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n);
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg /= loops;
|
||||
timeg = time1/loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -209,4 +209,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -266,4 +266,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -35,12 +35,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#undef GER
|
||||
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define GER BLASFUNC(zgeru)
|
||||
#else
|
||||
#define GER BLASFUNC(cgeru)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define GER BLASFUNC(dger)
|
||||
#else
|
||||
#define GER BLASFUNC(sger)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
@@ -108,7 +115,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -214,5 +221,5 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
|
||||
218
benchmark/gesv.c
Normal file
218
benchmark/gesv.c
Normal file
@@ -0,0 +1,218 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
#undef GESV
|
||||
#undef GETRS
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
#define GESV BLASFUNC(qgesv)
|
||||
#elif defined(DOUBLE)
|
||||
#define GESV BLASFUNC(dgesv)
|
||||
#else
|
||||
#define GESV BLASFUNC(sgesv)
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
#define GESV BLASFUNC(xgesv)
|
||||
#elif defined(DOUBLE)
|
||||
#define GESV BLASFUNC(zgesv)
|
||||
#else
|
||||
#define GESV BLASFUNC(cgesv)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
|
||||
blasint m, i, j, info;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %dx%d : ", (int)m, (int)m);
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
b[i + j * m * COMPSIZE] = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (j = 0; j < m; ++j) {
|
||||
for (i = 0; i < m * COMPSIZE; ++i) {
|
||||
b[i] += a[i + j * m * COMPSIZE];
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
|
||||
|
||||
fprintf(stderr,
|
||||
"%10.2f MFlops %10.6f s\n",
|
||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*work;
|
||||
FLOAT wkopt[4];
|
||||
@@ -231,4 +231,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -107,7 +107,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -189,4 +189,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -205,4 +205,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -106,7 +106,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -188,4 +188,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -108,7 +108,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -186,4 +186,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -137,7 +137,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
@@ -270,4 +270,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -52,6 +52,11 @@ C)
|
||||
awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
B)
|
||||
# Copy Benchmark
|
||||
awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2
|
||||
;;
|
||||
|
||||
|
||||
*)
|
||||
awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2
|
||||
|
||||
@@ -88,6 +88,10 @@ double fabs(double);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
@@ -114,7 +118,7 @@ int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
#ifndef COMPLEX
|
||||
char *trans[] = {"T", "N"};
|
||||
@@ -278,5 +282,5 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
|
||||
202
benchmark/scal.c
Normal file
202
benchmark/scal.c
Normal file
@@ -0,0 +1,202 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SCAL
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define SCAL BLASFUNC(zscal)
|
||||
#else
|
||||
#define SCAL BLASFUNC(cscal)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define SCAL BLASFUNC(dscal)
|
||||
#else
|
||||
#define SCAL BLASFUNC(sscal)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SCAL (&m, alpha, x, &inc_x);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6);
|
||||
#else
|
||||
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
56
benchmark/scripts/NUMPY/cgemm.py
Executable file
56
benchmark/scripts/NUMPY/cgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_cgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
B = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_cgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/cgemv.py
Executable file
56
benchmark/scripts/NUMPY/cgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_cgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j;
|
||||
B = randn(N).astype('float32') + randn(N).astype('float32') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_cgemv(i,LOOPS)
|
||||
|
||||
58
benchmark/scripts/NUMPY/daxpy.py
Executable file
58
benchmark/scripts/NUMPY/daxpy.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
from scipy.linalg.blas import daxpy
|
||||
|
||||
|
||||
def run_daxpy(N,l):
|
||||
|
||||
x = randn(N).astype('float64')
|
||||
y = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
y = daxpy(x,y, a=2.0 )
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_daxpy(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/ddot.py
Executable file
56
benchmark/scripts/NUMPY/ddot.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_ddot(N,l):
|
||||
|
||||
A = randn(N).astype('float64')
|
||||
B = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_ddot(i,LOOPS)
|
||||
|
||||
55
benchmark/scripts/NUMPY/deig.py
Executable file
55
benchmark/scripts/NUMPY/deig.py
Executable file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_deig(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
la,v = numpy.linalg.eig(A)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 26.33 *N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_deig(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/dgemm.py
Executable file
56
benchmark/scripts/NUMPY/dgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/dgemv.py
Executable file
56
benchmark/scripts/NUMPY/dgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgemv(i,LOOPS)
|
||||
|
||||
58
benchmark/scripts/NUMPY/dgesv.py
Executable file
58
benchmark/scripts/NUMPY/dgesv.py
Executable file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
from scipy.linalg.lapack import dgesv
|
||||
|
||||
def run_dgesv(N,l):
|
||||
|
||||
a = randn(N,N).astype('float64')
|
||||
b = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
dgesv(a,b,1,1)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
|
||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dgesv(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/dsolve.py
Executable file
56
benchmark/scripts/NUMPY/dsolve.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_dsolve(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64')
|
||||
B = randn(N,N).astype('float64')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.linalg.solve(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_dsolve(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/sdot.py
Executable file
56
benchmark/scripts/NUMPY/sdot.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sdot(N,l):
|
||||
|
||||
A = randn(N).astype('float32')
|
||||
B = randn(N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N ) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%d" % (N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sdot(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/sgemm.py
Executable file
56
benchmark/scripts/NUMPY/sgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32')
|
||||
B = randn(N,N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/sgemv.py
Executable file
56
benchmark/scripts/NUMPY/sgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_sgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float32')
|
||||
B = randn(N).astype('float32')
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 2*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_sgemv(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/zgemm.py
Executable file
56
benchmark/scripts/NUMPY/zgemm.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_zgemm(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
B = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_zgemm(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/NUMPY/zgemv.py
Executable file
56
benchmark/scripts/NUMPY/zgemv.py
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import numpy
|
||||
from numpy.random import randn
|
||||
|
||||
def run_zgemv(N,l):
|
||||
|
||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j;
|
||||
B = randn(N).astype('float64') + randn(N).astype('float64') * 1j;
|
||||
|
||||
start = time.time();
|
||||
for i in range(0,l):
|
||||
ref = numpy.dot(A,B)
|
||||
end = time.time()
|
||||
|
||||
timediff = (end -start)
|
||||
mflops = ( 8*N*N) *l / timediff
|
||||
mflops *= 1e-6
|
||||
|
||||
size = "%dx%d" % (N,N)
|
||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
N=128
|
||||
NMAX=2048
|
||||
NINC=128
|
||||
LOOPS=1
|
||||
|
||||
z=0
|
||||
for arg in sys.argv:
|
||||
if z == 1:
|
||||
N = int(arg)
|
||||
elif z == 2:
|
||||
NMAX = int(arg)
|
||||
elif z == 3:
|
||||
NINC = int(arg)
|
||||
elif z == 4:
|
||||
LOOPS = int(arg)
|
||||
|
||||
z = z + 1
|
||||
|
||||
if 'OPENBLAS_LOOPS' in os.environ:
|
||||
p = os.environ['OPENBLAS_LOOPS']
|
||||
if p:
|
||||
LOOPS = int(p);
|
||||
|
||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||
|
||||
for i in range (N,NMAX+NINC,NINC):
|
||||
run_zgemv(i,LOOPS)
|
||||
|
||||
56
benchmark/scripts/OCTAVE/cgemm.m
Executable file
56
benchmark/scripts/OCTAVE/cgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
B = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/cgemv.m
Executable file
56
benchmark/scripts/OCTAVE/cgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i;
|
||||
B = single(rand(n,1)) + single(rand(n,1)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/deig.m
Executable file
56
benchmark/scripts/OCTAVE/deig.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
[V,lambda] = eig(A);
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 26.33 *n*n*n ) *loops / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/dgemm.m
Executable file
56
benchmark/scripts/OCTAVE/dgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/dgemv.m
Executable file
56
benchmark/scripts/OCTAVE/dgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,1));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
59
benchmark/scripts/OCTAVE/dsolve.m
Executable file
59
benchmark/scripts/OCTAVE/dsolve.m
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n));
|
||||
B = double(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
x = linsolve(A,B);
|
||||
#x = A / B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
#r = norm(A*x - B)/norm(B)
|
||||
mflops = ( 2.0/3.0 *n*n*n + 2.0*n*n*n ) *loops / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg );
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/sgemm.m
Executable file
56
benchmark/scripts/OCTAVE/sgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n));
|
||||
B = single(rand(n,n));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/sgemv.m
Executable file
56
benchmark/scripts/OCTAVE/sgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = single(rand(n,n));
|
||||
B = single(rand(n,1));
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/zgemm.m
Executable file
56
benchmark/scripts/OCTAVE/zgemm.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
B = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
56
benchmark/scripts/OCTAVE/zgemv.m
Executable file
56
benchmark/scripts/OCTAVE/zgemv.m
Executable file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/octave --silent
|
||||
|
||||
nfrom = 128 ;
|
||||
nto = 2048;
|
||||
nstep = 128;
|
||||
loops = 1;
|
||||
|
||||
|
||||
arg_list = argv();
|
||||
for i = 1:nargin
|
||||
|
||||
switch(i)
|
||||
case 1
|
||||
nfrom = str2num(arg_list{i});
|
||||
case 2
|
||||
nto = str2num(arg_list{i});
|
||||
case 3
|
||||
nstep = str2num(arg_list{i});
|
||||
case 4
|
||||
loops = str2num(arg_list{i});
|
||||
|
||||
endswitch
|
||||
|
||||
endfor
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if p
|
||||
loops = str2num(p);
|
||||
endif
|
||||
|
||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops);
|
||||
printf(" SIZE FLOPS TIME\n");
|
||||
|
||||
n = nfrom;
|
||||
while n <= nto
|
||||
|
||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i;
|
||||
B = double(rand(n,1)) + double(rand(n,1)) * 1i;
|
||||
start = clock();
|
||||
|
||||
l=0;
|
||||
while l < loops
|
||||
|
||||
C = A * B;
|
||||
l = l + 1;
|
||||
|
||||
endwhile
|
||||
|
||||
timeg = etime(clock(), start);
|
||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 );
|
||||
|
||||
st1 = sprintf("%dx%d : ", n,n);
|
||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg);
|
||||
n = n + nstep;
|
||||
|
||||
endwhile
|
||||
62
benchmark/scripts/R/deig.R
Executable file
62
benchmark/scripts/R/deig.R
Executable file
@@ -0,0 +1,62 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
ev <- eigen(A)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
}
|
||||
|
||||
|
||||
63
benchmark/scripts/R/dgemm.R
Executable file
63
benchmark/scripts/R/dgemm.R
Executable file
@@ -0,0 +1,63 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
C <- A %*% B
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
}
|
||||
|
||||
|
||||
63
benchmark/scripts/R/dsolve.R
Executable file
63
benchmark/scripts/R/dsolve.R
Executable file
@@ -0,0 +1,63 @@
|
||||
#!/usr/bin/Rscript
|
||||
|
||||
argv <- commandArgs(trailingOnly = TRUE)
|
||||
|
||||
nfrom = 128
|
||||
nto = 2048
|
||||
nstep = 128
|
||||
loops = 1
|
||||
|
||||
if ( length(argv) > 0 ) {
|
||||
|
||||
for ( z in 1:length(argv) ) {
|
||||
|
||||
if ( z == 1 ) {
|
||||
nfrom <- as.numeric(argv[z])
|
||||
} else if ( z==2 ) {
|
||||
nto <- as.numeric(argv[z])
|
||||
} else if ( z==3 ) {
|
||||
nstep <- as.numeric(argv[z])
|
||||
} else if ( z==4 ) {
|
||||
loops <- as.numeric(argv[z])
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p=Sys.getenv("OPENBLAS_LOOPS")
|
||||
if ( p != "" ) {
|
||||
loops <- as.numeric(p)
|
||||
}
|
||||
|
||||
|
||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops))
|
||||
cat(sprintf(" SIZE Flops Time\n"))
|
||||
|
||||
n = nfrom
|
||||
while ( n <= nto ) {
|
||||
|
||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE)
|
||||
|
||||
l = 1
|
||||
|
||||
start <- proc.time()[3]
|
||||
|
||||
while ( l <= loops ) {
|
||||
|
||||
solve(A,B)
|
||||
l = l + 1
|
||||
}
|
||||
|
||||
end <- proc.time()[3]
|
||||
timeg = end - start
|
||||
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 )
|
||||
|
||||
st = sprintf("%.0fx%.0f :",n , n)
|
||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg))
|
||||
|
||||
n = n + nstep
|
||||
|
||||
}
|
||||
|
||||
|
||||
201
benchmark/swap.c
Normal file
201
benchmark/swap.c
Normal file
@@ -0,0 +1,201 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above swapright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above swapright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE SWAPRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SWAP
|
||||
|
||||
#ifdef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define SWAP BLASFUNC(zswap)
|
||||
#else
|
||||
#define SWAP BLASFUNC(cswap)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define SWAP BLASFUNC(dswap)
|
||||
#else
|
||||
#define SWAP BLASFUNC(sswap)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SWAP (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MBytes\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -200,4 +200,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -215,4 +215,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -200,4 +200,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -196,4 +196,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -199,4 +199,4 @@ int MAIN__(int argc, char *argv[]){
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
@@ -118,7 +118,7 @@ static void *huge_malloc(BLASLONG size){
|
||||
|
||||
#endif
|
||||
|
||||
int MAIN__(int argc, char *argv[]){
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 1.0};
|
||||
@@ -130,11 +130,21 @@ int MAIN__(int argc, char *argv[]){
|
||||
char trans='N';
|
||||
char diag ='U';
|
||||
|
||||
|
||||
int l;
|
||||
int loops = 1;
|
||||
double timeg;
|
||||
|
||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL )
|
||||
loops = atoi(p);
|
||||
|
||||
|
||||
blasint m, i, j;
|
||||
|
||||
int from = 1;
|
||||
@@ -150,7 +160,7 @@ int MAIN__(int argc, char *argv[]){
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c Loops = %d\n", from, to, step,side,uplo,trans,diag,loops);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
@@ -171,32 +181,39 @@ int MAIN__(int argc, char *argv[]){
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
timeg=0.0;
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
time1 = timeg/loops;
|
||||
|
||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
|
||||
196
benchmark/zdot-intel.c
Normal file
196
benchmark/zdot-intel.c
Normal file
@@ -0,0 +1,196 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#define RETURN_BY_STACK 1
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(zdotu)
|
||||
#else
|
||||
#define DOT BLASFUNC(cdotu)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT _Complex result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
DOT (&result, &m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
195
benchmark/zdot.c
Normal file
195
benchmark/zdot.c
Normal file
@@ -0,0 +1,195 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(zdotu)
|
||||
#else
|
||||
#define DOT BLASFUNC(cdotu)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
FLOAT _Complex result;
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
int loops = 1;
|
||||
int l;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);
|
||||
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Flops\n");
|
||||
|
||||
for(m = from; m <= to; m += step)
|
||||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));
|
||||
8
c_check
8
c_check
@@ -3,6 +3,9 @@
|
||||
# Checking cross compile
|
||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||
|
||||
$binary = $ENV{"BINARY"};
|
||||
|
||||
@@ -54,6 +57,7 @@ $os = osf if ($data =~ /OS_OSF/);
|
||||
$os = WINNT if ($data =~ /OS_WINNT/);
|
||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/);
|
||||
$os = Interix if ($data =~ /OS_INTERIX/);
|
||||
$os = Android if ($data =~ /OS_ANDROID/);
|
||||
|
||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||
@@ -80,6 +84,10 @@ if (($architecture eq "mips32") || ($architecture eq "mips64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if (($architecture eq "arm") || ($architecture eq "arm64")) {
|
||||
$defined = 1;
|
||||
}
|
||||
|
||||
if ($architecture eq "alpha") {
|
||||
$defined = 1;
|
||||
$binary = 64;
|
||||
|
||||
16
cblas.h
16
cblas.h
@@ -13,6 +13,12 @@ extern "C" {
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
@@ -341,6 +347,16 @@ void cblas_cimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum
|
||||
void cblas_zimatcopy(OPENBLAS_CONST enum CBLAS_ORDER CORDER, OPENBLAS_CONST enum CBLAS_TRANSPOSE CTRANS, OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double* calpha, double* a,
|
||||
OPENBLAS_CONST blasint clda, OPENBLAS_CONST blasint cldb);
|
||||
|
||||
void cblas_sgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_dgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_cgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST float *calpha, float *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST float *cbeta,
|
||||
float *c, OPENBLAS_CONST blasint cldc);
|
||||
void cblas_zgeadd(OPENBLAS_CONST enum CBLAS_ORDER CORDER,OPENBLAS_CONST blasint crows, OPENBLAS_CONST blasint ccols, OPENBLAS_CONST double *calpha, double *a, OPENBLAS_CONST blasint clda, OPENBLAS_CONST double *cbeta,
|
||||
double *c, OPENBLAS_CONST blasint cldc);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
@@ -13,6 +13,12 @@ extern "C" {
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
void goto_set_num_threads(int num_threads);
|
||||
|
||||
/*Get the number of threads on runtime.*/
|
||||
int openblas_get_num_threads(void);
|
||||
|
||||
/*Get the number of physical processors (cores).*/
|
||||
int openblas_get_num_procs(void);
|
||||
|
||||
/*Get the build configure on runtime.*/
|
||||
char* openblas_get_config(void);
|
||||
|
||||
@@ -327,6 +333,16 @@ void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, bl
|
||||
blasint clda, blasint cldb);
|
||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a,
|
||||
blasint clda, blasint cldb);
|
||||
|
||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta,
|
||||
double *c, blasint cldc);
|
||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta,
|
||||
float *c, blasint cldc);
|
||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta,
|
||||
double *c, blasint cldc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif /* __cplusplus */
|
||||
|
||||
21
common.h
21
common.h
@@ -93,6 +93,10 @@ extern "C" {
|
||||
#include <sched.h>
|
||||
#endif
|
||||
|
||||
#ifdef OS_ANDROID
|
||||
#define NO_SYSV_IPC
|
||||
#endif
|
||||
|
||||
#ifdef OS_WINDOWS
|
||||
#ifdef ATOM
|
||||
#define GOTO_ATOM ATOM
|
||||
@@ -106,7 +110,9 @@ extern "C" {
|
||||
#endif
|
||||
#else
|
||||
#include <sys/mman.h>
|
||||
#ifndef NO_SYSV_IPC
|
||||
#include <sys/shm.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
@@ -270,6 +276,11 @@ typedef int blasint;
|
||||
#define SIZE 8
|
||||
#define BASE_SHIFT 3
|
||||
#define ZBASE_SHIFT 4
|
||||
#elif defined(INTEGER) //extend for integer matrix
|
||||
#define FLOAT int
|
||||
#define SIZE 4
|
||||
#define BASE_SHIFT 2
|
||||
#define ZBASE_SHIFT 3
|
||||
#else
|
||||
#define FLOAT float
|
||||
#define SIZE 4
|
||||
@@ -327,6 +338,14 @@ typedef int blasint;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/*
|
||||
#ifdef STEAMROLLER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
*/
|
||||
|
||||
#ifndef YIELDING
|
||||
#define YIELDING sched_yield()
|
||||
#endif
|
||||
@@ -491,6 +510,8 @@ void blas_set_parameter(void);
|
||||
int blas_get_cpu_number(void);
|
||||
void *blas_memory_alloc (int);
|
||||
void blas_memory_free (void *);
|
||||
void *blas_memory_alloc_nolock (int); //use malloc without blas_lock
|
||||
void blas_memory_free_nolock (void *);
|
||||
|
||||
int get_num_procs (void);
|
||||
|
||||
|
||||
60
common_arm.h
60
common_arm.h
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011-2015, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,9 +13,10 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
@@ -27,53 +28,23 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM
|
||||
#define COMMON_ARM
|
||||
|
||||
#if defined(ARMV5) || defined(ARMV6)
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
|
||||
#else
|
||||
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
#endif
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
#define RETURN_BY_COMPLEX
|
||||
@@ -88,9 +59,12 @@ static void __inline blas_lock(volatile BLASULONG *address){
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"1: \n\t"
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"cmp r3, #0 \n\t"
|
||||
"bne 1b \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011-2015, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,9 +13,10 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
@@ -27,52 +28,14 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
/* without modification, are permitted provided that the following */
|
||||
/* conditions are met: */
|
||||
/* */
|
||||
/* 1. Redistributions of source code must retain the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer. */
|
||||
/* */
|
||||
/* 2. Redistributions in binary form must reproduce the above */
|
||||
/* copyright notice, this list of conditions and the following */
|
||||
/* disclaimer in the documentation and/or other materials */
|
||||
/* provided with the distribution. */
|
||||
/* */
|
||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
||||
/* */
|
||||
/* The views and conclusions contained in the software and */
|
||||
/* documentation are those of the authors and should not be */
|
||||
/* interpreted as representing official policies, either expressed */
|
||||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#ifndef COMMON_ARM64
|
||||
#define COMMON_ARM64
|
||||
|
||||
#define MB
|
||||
#define WMB
|
||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory")
|
||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory")
|
||||
|
||||
|
||||
#define INLINE inline
|
||||
|
||||
@@ -81,26 +44,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
/*
|
||||
|
||||
int register ret;
|
||||
int register tmp;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldrex r2, [%1] \n\t"
|
||||
"mov r2, #0 \n\t"
|
||||
"strex r3, r2, [%1] \n\t"
|
||||
"mov %0 , r3 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
"1: \n\t"
|
||||
"ldaxr %2, [%1] \n\t"
|
||||
"mov %2, #0 \n\t"
|
||||
"stlxr %w0, %2, [%1] \n\t"
|
||||
"cbnz %w0, 1b \n\t"
|
||||
"mov %0 , #0 \n\t"
|
||||
: "=r"(ret), "=r"(address), "=r"(tmp)
|
||||
: "1"(address)
|
||||
: "memory", "r2" , "r3"
|
||||
: "memory", "%w0"
|
||||
//, "%r2" , "%r3"
|
||||
|
||||
|
||||
);
|
||||
|
||||
} while (ret);
|
||||
*/
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -119,9 +86,9 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
}
|
||||
|
||||
#if defined(DOUBLE)
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory")
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory")
|
||||
#else
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory")
|
||||
#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory")
|
||||
#endif
|
||||
|
||||
#define GET_IMAGE_CANCEL
|
||||
@@ -138,7 +105,6 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||
|
||||
#define PROLOGUE \
|
||||
.arm ;\
|
||||
.global REALNAME ;\
|
||||
.func REALNAME ;\
|
||||
REALNAME:
|
||||
@@ -167,3 +133,4 @@ REALNAME:
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -220,6 +220,7 @@
|
||||
#define COMATCOPY_K_CTC comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC comatcopy_k_rtc
|
||||
|
||||
#define CGEADD_K cgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
@@ -402,6 +403,7 @@
|
||||
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc
|
||||
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc
|
||||
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc
|
||||
#define CGEADD_K gotoblas -> cgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -149,6 +149,7 @@
|
||||
#define DOMATCOPY_K_RN domatcopy_k_rn
|
||||
#define DOMATCOPY_K_CT domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT domatcopy_k_rt
|
||||
#define DGEADD_K dgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
@@ -267,6 +268,8 @@
|
||||
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct
|
||||
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt
|
||||
|
||||
#define DGEADD_K gotoblas -> dgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define DGEMM_NN dgemm_nn
|
||||
|
||||
9
common_i.h
Normal file
9
common_i.h
Normal file
@@ -0,0 +1,9 @@
|
||||
#ifndef COMMON_I_H
|
||||
#define COMMON_I_H
|
||||
|
||||
#ifndef DYNAMIC_ARCH
|
||||
#define IAXPYU_K iaxpy_k
|
||||
#else
|
||||
#error
|
||||
#endif
|
||||
#endif
|
||||
@@ -93,6 +93,7 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo
|
||||
|
||||
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *);
|
||||
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *);
|
||||
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *);
|
||||
@@ -754,6 +755,12 @@ void BLASFUNC(dimatcopy) (char *, char *, blasint *, blasint *, double *, do
|
||||
void BLASFUNC(cimatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, blasint *);
|
||||
void BLASFUNC(zimatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, blasint *);
|
||||
|
||||
void BLASFUNC(sgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(dgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
void BLASFUNC(cgeadd) (blasint *, blasint *, float *, float *, blasint *, float *, float *, blasint*);
|
||||
void BLASFUNC(zgeadd) (blasint *, blasint *, double *, double *, blasint *, double *, double *, blasint*);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
||||
@@ -60,6 +60,8 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
|
||||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
|
||||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int,
|
||||
int *, BLASLONG, int *, BLASLONG, int *, BLASLONG);
|
||||
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,
|
||||
|
||||
@@ -1762,6 +1762,11 @@ int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, dou
|
||||
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG);
|
||||
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG);
|
||||
int cgeadd_k(BLASLONG, BLASLONG, float, float, float*, BLASLONG, float, float, float *, BLASLONG);
|
||||
int zgeadd_k(BLASLONG, BLASLONG, double,double, double*, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
|
||||
#ifdef __CUDACC__
|
||||
}
|
||||
|
||||
@@ -47,6 +47,10 @@
|
||||
#include "common_z.h"
|
||||
#include "common_x.h"
|
||||
|
||||
#ifdef INTEGER_PRECISION
|
||||
#include "common_i.h"
|
||||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef XDOUBLE
|
||||
|
||||
@@ -634,7 +638,10 @@
|
||||
#define OMATCOPY_K_RN DOMATCOPY_K_RN
|
||||
#define OMATCOPY_K_CT DOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT DOMATCOPY_K_RT
|
||||
#define GEADD_K DGEADD_K
|
||||
|
||||
#elif defined(INTEGER)
|
||||
#define AXPYU_K IAXPYU_K
|
||||
#else
|
||||
|
||||
#define AMAX_K SAMAX_K
|
||||
@@ -932,6 +939,7 @@
|
||||
#define OMATCOPY_K_CT SOMATCOPY_K_CT
|
||||
#define OMATCOPY_K_RT SOMATCOPY_K_RT
|
||||
|
||||
#define GEADD_K SGEADD_K
|
||||
#endif
|
||||
#else
|
||||
#ifdef XDOUBLE
|
||||
@@ -1746,6 +1754,7 @@
|
||||
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC
|
||||
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC
|
||||
#define GEADD_K ZGEADD_K
|
||||
|
||||
#else
|
||||
|
||||
@@ -2159,6 +2168,8 @@
|
||||
#define OMATCOPY_K_CTC COMATCOPY_K_CTC
|
||||
#define OMATCOPY_K_RTC COMATCOPY_K_RTC
|
||||
|
||||
#define GEADD_K CGEADD_K
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,9 +13,10 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
@@ -27,7 +28,6 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************/
|
||||
|
||||
@@ -855,6 +855,10 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
||||
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, float, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
|
||||
} gotoblas_t;
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,9 +13,10 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
|
||||
@@ -153,6 +153,7 @@
|
||||
#define SOMATCOPY_K_CT somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT somatcopy_k_rt
|
||||
|
||||
#define SGEADD_K sgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
@@ -274,6 +275,7 @@
|
||||
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct
|
||||
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt
|
||||
|
||||
#define SGEADD_K gotoblas -> sgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ extern int blas_omp_linked;
|
||||
#define BLAS_XDOUBLE 0x0002U
|
||||
#define BLAS_REAL 0x0000U
|
||||
#define BLAS_COMPLEX 0x0004U
|
||||
#define BLAS_INTEGER 0x0008U
|
||||
|
||||
#define BLAS_TRANSA 0x0030U /* 2bit */
|
||||
#define BLAS_TRANSA_N 0x0000U
|
||||
|
||||
@@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
#define MMXSTORE movd
|
||||
#endif
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
@@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||
|
||||
#ifdef ASSEMBLER
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
@@ -220,6 +220,7 @@
|
||||
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K zgeadd_k
|
||||
|
||||
#else
|
||||
|
||||
@@ -403,6 +404,8 @@
|
||||
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc
|
||||
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc
|
||||
|
||||
#define ZGEADD_K gotoblas -> zgeadd_k
|
||||
|
||||
#endif
|
||||
|
||||
#define ZGEMM_NN zgemm_nn
|
||||
|
||||
12
cpuid.h
12
cpuid.h
@@ -104,10 +104,12 @@
|
||||
#define CORE_ATOM 18
|
||||
#define CORE_NANO 19
|
||||
#define CORE_SANDYBRIDGE 20
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
#define CORE_PILEDRIVER 23
|
||||
#define CORE_HASWELL 24
|
||||
#define CORE_HASWELL 24
|
||||
#define CORE_STEAMROLLER 25
|
||||
#define CORE_EXCAVATOR 26
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
@@ -200,6 +202,8 @@ typedef struct {
|
||||
#define CPUTYPE_BOBCAT 45
|
||||
#define CPUTYPE_BULLDOZER 46
|
||||
#define CPUTYPE_PILEDRIVER 47
|
||||
#define CPUTYPE_HASWELL 48
|
||||
#define CPUTYPE_HASWELL 48
|
||||
#define CPUTYPE_STEAMROLLER 49
|
||||
#define CPUTYPE_EXCAVATOR 50
|
||||
|
||||
#endif
|
||||
|
||||
95
cpuid_arm.c
95
cpuid_arm.c
@@ -30,16 +30,27 @@
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV6 1
|
||||
#define CPU_ARMV7 2
|
||||
#define CPU_CORTEXA15 3
|
||||
#define CPU_CORTEXA9 3
|
||||
#define CPU_CORTEXA15 4
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"ARMV6",
|
||||
"ARMV7",
|
||||
"CORTEXA9",
|
||||
"CORTEXA15"
|
||||
};
|
||||
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
"unknown",
|
||||
"armv6",
|
||||
"armv7",
|
||||
"cortexa9",
|
||||
"cortexa15"
|
||||
};
|
||||
|
||||
|
||||
int get_feature(char *search)
|
||||
{
|
||||
|
||||
@@ -85,6 +96,29 @@ int detect(void)
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("CPU part", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
if(p != NULL) {
|
||||
if (strstr(p, "0xc09")) {
|
||||
return CPU_CORTEXA9;
|
||||
}
|
||||
if (strstr(p, "0xc0f")) {
|
||||
return CPU_CORTEXA15;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
p = (char *) NULL ;
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
@@ -142,21 +176,7 @@ void get_architecture(void)
|
||||
void get_subarchitecture(void)
|
||||
{
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("ARMV7");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("ARMV6");
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("UNKNOWN");
|
||||
break;
|
||||
}
|
||||
printf("%s", cpuname[d]);
|
||||
}
|
||||
|
||||
void get_subdirname(void)
|
||||
@@ -170,6 +190,36 @@ void get_cpuconfig(void)
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
case CPU_CORTEXA9:
|
||||
printf("#define CORTEXA9\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
case CPU_CORTEXA15:
|
||||
printf("#define CORTEXA15\n");
|
||||
printf("#define HAVE_VFP\n");
|
||||
printf("#define HAVE_VFPV3\n");
|
||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n");
|
||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 32\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 32\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 128\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("#define ARMV7\n");
|
||||
@@ -206,18 +256,7 @@ void get_libname(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV7:
|
||||
printf("armv7\n");
|
||||
break;
|
||||
|
||||
case CPU_ARMV6:
|
||||
printf("armv6\n");
|
||||
break;
|
||||
|
||||
}
|
||||
printf("%s", cpuname_lower[d]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
217
cpuid_arm64.c
Normal file
217
cpuid_arm64.c
Normal file
@@ -0,0 +1,217 @@
|
||||
/**************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define CPU_UNKNOWN 0
|
||||
#define CPU_ARMV8 1
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKOWN",
|
||||
"ARMV8"
|
||||
};
|
||||
|
||||
|
||||
int get_feature(char *search)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
if (!strcmp(t, search)) { return(1); }
|
||||
}
|
||||
|
||||
#endif
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
int detect(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL)
|
||||
{
|
||||
|
||||
if (strstr(p, "AArch64"))
|
||||
{
|
||||
return CPU_ARMV8;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
|
||||
char *get_corename(void)
|
||||
{
|
||||
return cpuname[detect()];
|
||||
}
|
||||
|
||||
void get_architecture(void)
|
||||
{
|
||||
printf("ARM");
|
||||
}
|
||||
|
||||
void get_subarchitecture(void)
|
||||
{
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV8:
|
||||
printf("ARMV8");
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("UNKNOWN");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void get_subdirname(void)
|
||||
{
|
||||
printf("arm64");
|
||||
}
|
||||
|
||||
void get_cpuconfig(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV8:
|
||||
printf("#define ARMV8\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 262144\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
printf("#define L2_ASSOCIATIVE 4\n");
|
||||
break;
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_libname(void)
|
||||
{
|
||||
|
||||
int d = detect();
|
||||
switch (d)
|
||||
{
|
||||
|
||||
case CPU_ARMV8:
|
||||
printf("armv8\n");
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void get_features(void)
|
||||
{
|
||||
|
||||
#ifdef linux
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile))
|
||||
{
|
||||
|
||||
if (!strncmp("Features", buffer, 8))
|
||||
{
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
fclose(infile);
|
||||
|
||||
|
||||
if( p == NULL ) return;
|
||||
|
||||
t = strtok(p," ");
|
||||
while( t = strtok(NULL," "))
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*****************************************************************************
|
||||
Copyright (c) 2011,2012 Lab of Parallel Software and Computational Science,ISCAS
|
||||
Copyright (c) 2011-2014, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -13,9 +13,10 @@ met:
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the ISCAS nor the names of its contributors may
|
||||
be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written
|
||||
permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
|
||||
152
cpuid_x86.c
152
cpuid_x86.c
@@ -853,11 +853,24 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
if (get_vendor() == VENDOR_INTEL) {
|
||||
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
|
||||
if (cpuid_level >= 0x80000006) {
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
if(L2.size<=0){
|
||||
//If we didn't detect L2 correctly before,
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
L2.size = BITMASK(ecx, 16, 0xffff);
|
||||
L2.associative = BITMASK(ecx, 12, 0x0f);
|
||||
L2.linesize = BITMASK(ecx, 0, 0xff);
|
||||
L2.size = BITMASK(ecx, 16, 0xffff);
|
||||
L2.associative = BITMASK(ecx, 12, 0x0f);
|
||||
|
||||
switch (L2.associative){
|
||||
case 0x06:
|
||||
L2.associative = 8;
|
||||
break;
|
||||
case 0x08:
|
||||
L2.associative = 16;
|
||||
break;
|
||||
}
|
||||
|
||||
L2.linesize = BITMASK(ecx, 0, 0xff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -916,10 +929,22 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
|
||||
if (L2ITB.associative == 0xff) L2ITB.associative = 0;
|
||||
L2ITB.linesize = BITMASK(ebx, 0, 0xff);
|
||||
|
||||
L2.size = BITMASK(ecx, 16, 0xffff);
|
||||
L2.associative = BITMASK(ecx, 12, 0xf);
|
||||
if (L2.associative == 0xff) L2.associative = 0;
|
||||
L2.linesize = BITMASK(ecx, 0, 0xff);
|
||||
if(L2.size <= 0){
|
||||
//If we didn't detect L2 correctly before,
|
||||
L2.size = BITMASK(ecx, 16, 0xffff);
|
||||
L2.associative = BITMASK(ecx, 12, 0xf);
|
||||
switch (L2.associative){
|
||||
case 0x06:
|
||||
L2.associative = 8;
|
||||
break;
|
||||
case 0x08:
|
||||
L2.associative = 16;
|
||||
break;
|
||||
}
|
||||
|
||||
if (L2.associative == 0xff) L2.associative = 0;
|
||||
L2.linesize = BITMASK(ecx, 0, 0xff);
|
||||
}
|
||||
|
||||
L3.size = BITMASK(edx, 18, 0x3fff) * 512;
|
||||
L3.associative = BITMASK(edx, 12, 0xf);
|
||||
@@ -1073,6 +1098,16 @@ int get_cpuname(void){
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 13:
|
||||
//Broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
@@ -1087,11 +1122,36 @@ int get_cpuname(void){
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
case 7:
|
||||
case 15:
|
||||
//Broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
switch (model) {
|
||||
case 6:
|
||||
//Broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CPUTYPE_HASWELL;
|
||||
#else
|
||||
return CPUTYPE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CPUTYPE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 0x7:
|
||||
@@ -1137,6 +1197,21 @@ int get_cpuname(void){
|
||||
return CPUTYPE_PILEDRIVER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
case 0:
|
||||
switch(exmodel){
|
||||
case 3:
|
||||
if(support_avx())
|
||||
return CPUTYPE_STEAMROLLER;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
|
||||
case 6:
|
||||
if(support_avx())
|
||||
return CPUTYPE_EXCAVATOR;
|
||||
else
|
||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
@@ -1265,6 +1340,8 @@ static char *cpuname[] = {
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
"STEAMROLLER",
|
||||
"EXCAVATOR",
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
@@ -1316,6 +1393,8 @@ static char *lowercpuname[] = {
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
"steamroller",
|
||||
"excavator",
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
@@ -1344,6 +1423,8 @@ static char *corename[] = {
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
"STEAMROLLER",
|
||||
"EXCAVATOR",
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
@@ -1372,6 +1453,8 @@ static char *corename_lower[] = {
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
"steamroller",
|
||||
"excavator",
|
||||
};
|
||||
|
||||
|
||||
@@ -1490,6 +1573,16 @@ int get_coretype(void){
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 13:
|
||||
//broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
@@ -1504,11 +1597,36 @@ int get_coretype(void){
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
case 7:
|
||||
case 15:
|
||||
//broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
switch (model) {
|
||||
case 6:
|
||||
//broadwell
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
return CORE_HASWELL;
|
||||
#else
|
||||
return CORE_SANDYBRIDGE;
|
||||
#endif
|
||||
else
|
||||
return CORE_NEHALEM;
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -1537,7 +1655,25 @@ int get_coretype(void){
|
||||
return CORE_PILEDRIVER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
|
||||
case 0:
|
||||
switch(exmodel){
|
||||
case 3:
|
||||
if(support_avx())
|
||||
return CORE_STEAMROLLER;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
|
||||
case 6:
|
||||
if(support_avx())
|
||||
return CORE_EXCAVATOR;
|
||||
else
|
||||
return CORE_BARCELONA; //OS don't support AVX.
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
}else return CORE_BARCELONA;
|
||||
}
|
||||
}
|
||||
|
||||
4
ctest.c
4
ctest.c
@@ -44,6 +44,10 @@ COMPILER_DEC
|
||||
COMPILER_GNU
|
||||
#endif
|
||||
|
||||
#if defined(__ANDROID__)
|
||||
OS_ANDROID
|
||||
#endif
|
||||
|
||||
#if defined(__linux__)
|
||||
OS_LINUX
|
||||
#endif
|
||||
|
||||
@@ -27,12 +27,18 @@ ctestl2o = c_cblas2.o c_c2chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ctestl3o_3m = c_cblas3_3m.o c_c3chke_3m.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ztestl1o = c_zblas1.o
|
||||
|
||||
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o
|
||||
|
||||
|
||||
|
||||
all :: all1 all2 all3
|
||||
|
||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
||||
@@ -115,8 +121,8 @@ xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
|
||||
xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
xccblat3_3m: $(ctestl3o) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
# Double complex
|
||||
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
|
||||
@@ -127,8 +133,8 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
|
||||
xzcblat3_3m: $(ztestl3o) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
|
||||
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
|
||||
include $(TOPDIR)/Makefile.tail
|
||||
|
||||
230
ctest/c_c3chke.c
230
ctest/c_c3chke.c
@@ -46,235 +46,7 @@ void F77_c3chke(char * rout) {
|
||||
}
|
||||
|
||||
|
||||
if (strncmp( sf,"cblas_cgemm3m" ,13)==0) {
|
||||
cblas_rout = "cblas_cgemm3" ;
|
||||
|
||||
cblas_info = 1;
|
||||
cblas_cgemm3m( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm3m( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm3m( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 1;
|
||||
cblas_cgemm3m( INVALID, CblasTrans, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 2; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, INVALID, CblasTrans, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 3; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, INVALID, 0, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = FALSE;
|
||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 4; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 5; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 6; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 2 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 9; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 2, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 11; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 2, BETA, C, 1 );
|
||||
chkxer();
|
||||
cblas_info = 14; RowMajorStrg = TRUE;
|
||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
chkxer();
|
||||
|
||||
} else if (strncmp( sf,"cblas_cgemm" ,11)==0) {
|
||||
if (strncmp( sf,"cblas_cgemm" ,11)==0) {
|
||||
cblas_rout = "cblas_cgemm" ;
|
||||
|
||||
|
||||
|
||||
1936
ctest/c_c3chke_3m.c
Normal file
1936
ctest/c_c3chke_3m.c
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user