Merge branch 'release-0.2.16'
This commit is contained in:
commit
fced5744fb
|
@ -68,3 +68,4 @@ test/zblat2
|
||||||
test/zblat3
|
test/zblat3
|
||||||
build
|
build
|
||||||
build.*
|
build.*
|
||||||
|
*.swp
|
||||||
|
|
|
@ -24,7 +24,12 @@ before_install:
|
||||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi
|
||||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi
|
||||||
|
|
||||||
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
script:
|
||||||
|
- set -e
|
||||||
|
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE
|
||||||
|
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||||
|
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||||
|
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi
|
||||||
|
|
||||||
# whitelist
|
# whitelist
|
||||||
branches:
|
branches:
|
||||||
|
|
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4)
|
||||||
project(OpenBLAS)
|
project(OpenBLAS)
|
||||||
set(OpenBLAS_MAJOR_VERSION 0)
|
set(OpenBLAS_MAJOR_VERSION 0)
|
||||||
set(OpenBLAS_MINOR_VERSION 2)
|
set(OpenBLAS_MINOR_VERSION 2)
|
||||||
set(OpenBLAS_PATCH_VERSION 14)
|
set(OpenBLAS_PATCH_VERSION 16)
|
||||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||||
|
|
||||||
enable_language(ASM)
|
enable_language(ASM)
|
||||||
|
@ -54,10 +54,6 @@ if (NOT DYNAMIC_ARCH)
|
||||||
list(APPEND BLASDIRS kernel)
|
list(APPEND BLASDIRS kernel)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (DEFINED UTEST_CHECK)
|
|
||||||
set(SANITY_CHECK 1)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
if (DEFINED SANITY_CHECK)
|
if (DEFINED SANITY_CHECK)
|
||||||
list(APPEND BLASDIRS reference)
|
list(APPEND BLASDIRS reference)
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -110,6 +106,10 @@ if (${NO_STATIC} AND ${NO_SHARED})
|
||||||
message(FATAL_ERROR "Neither static nor shared are enabled.")
|
message(FATAL_ERROR "Neither static nor shared are enabled.")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
#Set default output directory
|
||||||
|
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
|
||||||
|
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib )
|
||||||
|
|
||||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
|
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
|
||||||
set(TARGET_OBJS "")
|
set(TARGET_OBJS "")
|
||||||
foreach (SUBDIR ${SUBDIRS})
|
foreach (SUBDIR ${SUBDIRS})
|
||||||
|
@ -139,6 +139,17 @@ add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET
|
||||||
|
|
||||||
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
|
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake")
|
||||||
|
|
||||||
|
# Set output for libopenblas
|
||||||
|
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
|
||||||
|
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
|
||||||
|
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib)
|
||||||
|
endforeach()
|
||||||
|
|
||||||
|
enable_testing()
|
||||||
|
add_subdirectory(utest)
|
||||||
|
|
||||||
if(NOT MSVC)
|
if(NOT MSVC)
|
||||||
#only build shared library for MSVC
|
#only build shared library for MSVC
|
||||||
|
@ -152,7 +163,6 @@ target_link_libraries(${OpenBLAS_LIBNAME}_static pthread)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
#build test and ctest
|
#build test and ctest
|
||||||
enable_testing()
|
|
||||||
add_subdirectory(test)
|
add_subdirectory(test)
|
||||||
if(NOT NO_CBLAS)
|
if(NOT NO_CBLAS)
|
||||||
add_subdirectory(ctest)
|
add_subdirectory(ctest)
|
||||||
|
|
|
@ -121,6 +121,17 @@ In chronological order:
|
||||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||||
ARMv8 support.
|
ARMv8 support.
|
||||||
|
|
||||||
|
* Jerome Robert <jeromerobert@gmx.com>
|
||||||
|
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
|
||||||
|
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
|
||||||
|
* [2015-12-28] Allow to force the number of parallel make job
|
||||||
|
* [2015-12-28] Fix detection of AMD E2-3200 detection
|
||||||
|
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
|
||||||
|
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
|
||||||
|
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
|
||||||
|
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
|
||||||
|
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)
|
||||||
|
|
||||||
* Dan Kortschak
|
* Dan Kortschak
|
||||||
* [2015-01-07] Added test for drotmg bug #484.
|
* [2015-01-07] Added test for drotmg bug #484.
|
||||||
|
|
||||||
|
@ -130,5 +141,11 @@ In chronological order:
|
||||||
* Martin Koehler <https://github.com/grisuthedragon/>
|
* Martin Koehler <https://github.com/grisuthedragon/>
|
||||||
* [2015-09-07] Improved imatcopy
|
* [2015-09-07] Improved imatcopy
|
||||||
|
|
||||||
|
* Ashwin Sekhar T K <https://github.com/ashwinyes/>
|
||||||
|
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
|
||||||
|
* [2015-11-20] lapack-test fixes for Cortex-A57
|
||||||
|
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
|
||||||
|
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57
|
||||||
|
|
||||||
* [Your name or handle] <[email or website]>
|
* [Your name or handle] <[email or website]>
|
||||||
* [Date] [Brief summary of your changes]
|
* [Date] [Brief summary of your changes]
|
||||||
|
|
|
@ -1,4 +1,57 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.2.16
|
||||||
|
15-Mar-2016
|
||||||
|
common:
|
||||||
|
* Avoid potential getenv segfault. (#716)
|
||||||
|
* Import LAPACK svn bugfix #142-#147,#150-#155
|
||||||
|
|
||||||
|
x86/x86_64:
|
||||||
|
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller
|
||||||
|
* Fix bug with scipy linalg test.
|
||||||
|
|
||||||
|
ARM:
|
||||||
|
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K)
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Optimize D and Z BLAS3 functions for Power8.
|
||||||
|
|
||||||
|
====================================================================
|
||||||
|
Version 0.2.16.rc1
|
||||||
|
23-Feb-2016
|
||||||
|
common:
|
||||||
|
* Upgrade LAPACK to 3.6.0 version.
|
||||||
|
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build
|
||||||
|
LAPACK deprecated functions.
|
||||||
|
* Add MAKE_NB_JOBS option in Makefile.
|
||||||
|
Force number of make jobs.This is particularly
|
||||||
|
useful when using distcc. (#735. Thanks, Jerome Robert.)
|
||||||
|
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor).
|
||||||
|
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert)
|
||||||
|
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert)
|
||||||
|
* Let openblas_get_num_threads return the number of active threads.
|
||||||
|
(#760. Thanks, Jerome Robert)
|
||||||
|
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen)
|
||||||
|
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey)
|
||||||
|
* Update scipy benchmark script. (#745. Thanks, John Kirkham)
|
||||||
|
|
||||||
|
x86/x86_64:
|
||||||
|
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller.
|
||||||
|
* Detect Intel Avoton.
|
||||||
|
* Detect AMD Trinity, Richland, E2-3200.
|
||||||
|
* Fix gemv performance bug on Mac OSX Intel Haswell.
|
||||||
|
* Fix some bugs with CMake and Visual Studio
|
||||||
|
|
||||||
|
ARM:
|
||||||
|
* Support and optimize Cortex-A57 AArch64.
|
||||||
|
(#686. Thanks, Ashwin Sekhar TK)
|
||||||
|
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere)
|
||||||
|
* Update ARMV6 kernels.
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Fix detection of POWER architecture
|
||||||
|
(#684. Thanks, Sebastien Villemot)
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.2.15
|
Version 0.2.15
|
||||||
27-Oct-2015
|
27-Oct-2015
|
||||||
|
|
37
Makefile
37
Makefile
|
@ -7,10 +7,6 @@ ifneq ($(DYNAMIC_ARCH), 1)
|
||||||
BLASDIRS += kernel
|
BLASDIRS += kernel
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef UTEST_CHECK
|
|
||||||
SANITY_CHECK = 1
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef SANITY_CHECK
|
ifdef SANITY_CHECK
|
||||||
BLASDIRS += reference
|
BLASDIRS += reference
|
||||||
endif
|
endif
|
||||||
|
@ -85,22 +81,22 @@ endif
|
||||||
|
|
||||||
shared :
|
shared :
|
||||||
ifndef NO_SHARED
|
ifndef NO_SHARED
|
||||||
ifeq ($(OSNAME), Linux)
|
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
|
||||||
@$(MAKE) -C exports so
|
@$(MAKE) -C exports so
|
||||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), FreeBSD)
|
ifeq ($(OSNAME), FreeBSD)
|
||||||
@$(MAKE) -C exports so
|
@$(MAKE) -C exports so
|
||||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), NetBSD)
|
ifeq ($(OSNAME), NetBSD)
|
||||||
@$(MAKE) -C exports so
|
@$(MAKE) -C exports so
|
||||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), Darwin)
|
ifeq ($(OSNAME), Darwin)
|
||||||
@$(MAKE) -C exports dyn
|
@$(MAKE) -C exports dyn
|
||||||
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||||
endif
|
endif
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
@$(MAKE) -C exports dll
|
@$(MAKE) -C exports dll
|
||||||
|
@ -117,10 +113,8 @@ ifndef CROSS
|
||||||
touch $(LIBNAME)
|
touch $(LIBNAME)
|
||||||
ifndef NO_FBLAS
|
ifndef NO_FBLAS
|
||||||
$(MAKE) -C test all
|
$(MAKE) -C test all
|
||||||
ifdef UTEST_CHECK
|
|
||||||
$(MAKE) -C utest all
|
$(MAKE) -C utest all
|
||||||
endif
|
endif
|
||||||
endif
|
|
||||||
ifndef NO_CBLAS
|
ifndef NO_CBLAS
|
||||||
$(MAKE) -C ctest all
|
$(MAKE) -C ctest all
|
||||||
endif
|
endif
|
||||||
|
@ -249,16 +243,23 @@ ifndef NOFORTRAN
|
||||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
ifeq ($(FC), gfortran)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
|
ifeq ($(OSNAME), WINNT)
|
||||||
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
else
|
||||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
endif
|
||||||
|
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
|
||||||
|
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
|
@ -288,7 +289,17 @@ endif
|
||||||
lapack-test :
|
lapack-test :
|
||||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||||
|
ifneq ($(CROSS), 1)
|
||||||
|
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||||
|
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||||
|
endif
|
||||||
|
|
||||||
|
lapack-runtest:
|
||||||
|
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||||
|
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||||
|
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||||
|
|
||||||
|
|
||||||
blas-test:
|
blas-test:
|
||||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||||
|
|
|
@ -11,8 +11,8 @@ endif
|
||||||
|
|
||||||
ifeq ($(CORE), ARMV7)
|
ifeq ($(CORE), ARMV7)
|
||||||
ifeq ($(OSNAME), Android)
|
ifeq ($(OSNAME), Android)
|
||||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
|
||||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a
|
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a
|
||||||
|
@ -29,5 +29,3 @@ ifeq ($(CORE), ARMV5)
|
||||||
CCOMMON_OPT += -marm -march=armv5
|
CCOMMON_OPT += -marm -march=armv5
|
||||||
FCOMMON_OPT += -marm -march=armv5
|
FCOMMON_OPT += -marm -march=armv5
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -4,4 +4,8 @@ CCOMMON_OPT += -march=armv8-a
|
||||||
FCOMMON_OPT += -march=armv8-a
|
FCOMMON_OPT += -march=armv8-a
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), CORTEXA57)
|
||||||
|
CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||||
|
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ install : lib.grd
|
||||||
#for inc
|
#for inc
|
||||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||||
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h
|
||||||
|
@ -48,10 +48,10 @@ endif
|
||||||
|
|
||||||
ifndef NO_LAPACKE
|
ifndef NO_LAPACKE
|
||||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h
|
||||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h
|
||||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h
|
||||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h
|
||||||
endif
|
endif
|
||||||
|
|
||||||
#for install static library
|
#for install static library
|
||||||
|
@ -64,7 +64,7 @@ endif
|
||||||
#for install shared library
|
#for install shared library
|
||||||
ifndef NO_SHARED
|
ifndef NO_SHARED
|
||||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||||
ifeq ($(OSNAME), Linux)
|
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
|
||||||
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \
|
||||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.2.15
|
VERSION = 0.2.16
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
@ -79,6 +79,9 @@ VERSION = 0.2.15
|
||||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in.
|
||||||
# NO_LAPACKE = 1
|
# NO_LAPACKE = 1
|
||||||
|
|
||||||
|
# Build LAPACK Deprecated functions since LAPACK 3.6.0
|
||||||
|
# BUILD_LAPACK_DEPRECATED = 1
|
||||||
|
|
||||||
# If you want to use legacy threaded Level 3 implementation.
|
# If you want to use legacy threaded Level 3 implementation.
|
||||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||||
|
|
||||||
|
@ -108,6 +111,10 @@ NO_AFFINITY = 1
|
||||||
# Don't use parallel make.
|
# Don't use parallel make.
|
||||||
# NO_PARALLEL_MAKE = 1
|
# NO_PARALLEL_MAKE = 1
|
||||||
|
|
||||||
|
# Force number of make jobs. The default is the number of logical CPU of the host.
|
||||||
|
# This is particularly useful when using distcc
|
||||||
|
# MAKE_NB_JOBS = 2
|
||||||
|
|
||||||
# If you would like to know minute performance report of GotoBLAS.
|
# If you would like to know minute performance report of GotoBLAS.
|
||||||
# FUNCTION_PROFILE = 1
|
# FUNCTION_PROFILE = 1
|
||||||
|
|
||||||
|
@ -138,10 +145,6 @@ NO_AFFINITY = 1
|
||||||
# slow (Not implemented yet).
|
# slow (Not implemented yet).
|
||||||
# SANITY_CHECK = 1
|
# SANITY_CHECK = 1
|
||||||
|
|
||||||
# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable
|
|
||||||
# SANITY_CHECK to compare the result with reference BLAS.
|
|
||||||
# UTEST_CHECK = 1
|
|
||||||
|
|
||||||
# The installation directory.
|
# The installation directory.
|
||||||
# PREFIX = /opt/OpenBLAS
|
# PREFIX = /opt/OpenBLAS
|
||||||
|
|
||||||
|
@ -159,10 +162,11 @@ COMMON_PROF = -pg
|
||||||
# Build Debug version
|
# Build Debug version
|
||||||
# DEBUG = 1
|
# DEBUG = 1
|
||||||
|
|
||||||
# Improve GEMV and GER for small matrices by stack allocation.
|
# Set maximum stack allocation.
|
||||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482
|
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
|
||||||
|
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
|
||||||
#
|
#
|
||||||
MAX_STACK_ALLOC=2048
|
# MAX_STACK_ALLOC = 0
|
||||||
|
|
||||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||||
# Avoid conflicts with other BLAS libraries, especially when using
|
# Avoid conflicts with other BLAS libraries, especially when using
|
||||||
|
|
|
@ -139,6 +139,10 @@ NO_PARALLEL_MAKE=0
|
||||||
endif
|
endif
|
||||||
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
|
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE)
|
||||||
|
|
||||||
|
ifdef MAKE_NB_JOBS
|
||||||
|
GETARCH_FLAGS += -DMAKE_NB_JOBS=$(MAKE_NB_JOBS)
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(HOSTCC), loongcc)
|
ifeq ($(HOSTCC), loongcc)
|
||||||
GETARCH_FLAGS += -static
|
GETARCH_FLAGS += -static
|
||||||
endif
|
endif
|
||||||
|
@ -292,12 +296,14 @@ endif
|
||||||
ifneq ($(OSNAME), WINNT)
|
ifneq ($(OSNAME), WINNT)
|
||||||
ifneq ($(OSNAME), CYGWIN_NT)
|
ifneq ($(OSNAME), CYGWIN_NT)
|
||||||
ifneq ($(OSNAME), Interix)
|
ifneq ($(OSNAME), Interix)
|
||||||
|
ifneq ($(OSNAME), Android)
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
EXTRALIB += -lpthread
|
EXTRALIB += -lpthread
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
# ifeq logical or
|
# ifeq logical or
|
||||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix))
|
||||||
|
@ -324,7 +330,8 @@ ifdef SANITY_CHECK
|
||||||
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef MAX_STACK_ALLOC
|
MAX_STACK_ALLOC ?= 2048
|
||||||
|
ifneq ($(MAX_STACK_ALLOC), 0)
|
||||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -374,7 +381,7 @@ FCOMMON_OPT += -m128bit-long-double
|
||||||
endif
|
endif
|
||||||
ifeq ($(C_COMPILER), CLANG)
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
EXPRECISION = 1
|
EXPRECISION = 1
|
||||||
CCOMMON_OPT += -DEXPRECISION
|
CCOMMON_OPT += -DEXPRECISION
|
||||||
FCOMMON_OPT += -m128bit-long-double
|
FCOMMON_OPT += -m128bit-long-double
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
@ -388,7 +395,7 @@ endif
|
||||||
|
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
|
|
||||||
#check
|
#check
|
||||||
ifeq ($(USE_THREAD), 0)
|
ifeq ($(USE_THREAD), 0)
|
||||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.)
|
||||||
endif
|
endif
|
||||||
|
@ -952,17 +959,18 @@ ifeq ($(OSNAME), SunOS)
|
||||||
TAR = gtar
|
TAR = gtar
|
||||||
PATCH = gpatch
|
PATCH = gpatch
|
||||||
GREP = ggrep
|
GREP = ggrep
|
||||||
|
AWK = nawk
|
||||||
else
|
else
|
||||||
TAR = tar
|
TAR = tar
|
||||||
PATCH = patch
|
PATCH = patch
|
||||||
GREP = grep
|
GREP = grep
|
||||||
|
AWK = awk
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef MD5SUM
|
ifndef MD5SUM
|
||||||
MD5SUM = md5sum
|
MD5SUM = md5sum
|
||||||
endif
|
endif
|
||||||
|
|
||||||
AWK = awk
|
|
||||||
|
|
||||||
REVISION = -r$(VERSION)
|
REVISION = -r$(VERSION)
|
||||||
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
|
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION)))
|
||||||
|
@ -971,16 +979,25 @@ ifeq ($(DEBUG), 1)
|
||||||
COMMON_OPT += -g
|
COMMON_OPT += -g
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(DEBUG), 1)
|
||||||
|
FCOMMON_OPT += -g
|
||||||
|
endif
|
||||||
|
|
||||||
ifndef COMMON_OPT
|
ifndef COMMON_OPT
|
||||||
COMMON_OPT = -O2
|
COMMON_OPT = -O2
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef FCOMMON_OPT
|
||||||
|
FCOMMON_OPT = -O2 -frecursive
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||||
|
|
||||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
override FFLAGS += $(FCOMMON_OPT)
|
||||||
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
||||||
#MAKEOVERRIDES =
|
#MAKEOVERRIDES =
|
||||||
|
|
||||||
#For LAPACK Fortran codes.
|
#For LAPACK Fortran codes.
|
||||||
|
@ -1170,4 +1187,3 @@ SUNPATH = /opt/sunstudio12.1
|
||||||
else
|
else
|
||||||
SUNPATH = /opt/SUNWspro
|
SUNPATH = /opt/SUNWspro
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
|
@ -75,10 +75,11 @@ Please read GotoBLAS_01Readme.txt
|
||||||
|
|
||||||
#### ARM64:
|
#### ARM64:
|
||||||
- **ARMV8**: Experimental
|
- **ARMV8**: Experimental
|
||||||
|
- **ARM Cortex-A57**: Experimental
|
||||||
|
|
||||||
### Support OS:
|
### Support OS:
|
||||||
- **GNU/Linux**
|
- **GNU/Linux**
|
||||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
- **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
|
||||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X.
|
||||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
- **FreeBSD**: Supported by community. We didn't test the library on this OS.
|
||||||
|
|
||||||
|
|
|
@ -74,3 +74,5 @@ ARMV5
|
||||||
|
|
||||||
7.ARM 64-bit CPU:
|
7.ARM 64-bit CPU:
|
||||||
ARMV8
|
ARMV8
|
||||||
|
CORTEXA57
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,199 @@
|
||||||
|
# Notes on OpenBLAS usage
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
#### Program is Terminated. Because you tried to allocate too many memory regions
|
||||||
|
|
||||||
|
In OpenBLAS, we mange a pool of memory buffers and allocate the number of
|
||||||
|
buffers as the following.
|
||||||
|
```
|
||||||
|
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
|
||||||
|
```
|
||||||
|
This error indicates that the program exceeded the number of buffers.
|
||||||
|
|
||||||
|
Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
|
||||||
|
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
|
||||||
|
`MAX_CPU_NUMBER=NUM_THREADS`.
|
||||||
|
|
||||||
|
#### How can I use OpenBLAS in multi-threaded applications?
|
||||||
|
|
||||||
|
If your application is already multi-threaded, it will conflict with OpenBLAS
|
||||||
|
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the
|
||||||
|
following ways:
|
||||||
|
|
||||||
|
* `export OPENBLAS_NUM_THREADS=1` in the environment variables.
|
||||||
|
* Call `openblas_set_num_threads(1)` in the application on runtime.
|
||||||
|
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0`
|
||||||
|
|
||||||
|
If the application is parallelized by OpenMP, please use OpenBLAS built with
|
||||||
|
`USE_OPENMP=1`
|
||||||
|
|
||||||
|
#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH
|
||||||
|
|
||||||
|
The environment variable which control the kernel selection is
|
||||||
|
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export
|
||||||
|
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()`
|
||||||
|
returns the used target.
|
||||||
|
|
||||||
|
#### How could I disable OpenBLAS threading affinity on runtime?
|
||||||
|
|
||||||
|
You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment
|
||||||
|
variable to disable threading affinity on runtime. For example, before the
|
||||||
|
running,
|
||||||
|
```
|
||||||
|
export OPENBLAS_MAIN_FREE=1
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1`
|
||||||
|
in `Makefile.rule`.
|
||||||
|
|
||||||
|
## Linking with the library
|
||||||
|
|
||||||
|
* Link with shared library
|
||||||
|
|
||||||
|
`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas`
|
||||||
|
|
||||||
|
If the library is multithreaded, please add `-lpthread`. If the library
|
||||||
|
contains LAPACK functions, please add `-lgfortran` or other Fortran libs.
|
||||||
|
|
||||||
|
* Link with static library
|
||||||
|
|
||||||
|
`gcc -o test test.c /your/path/libopenblas.a`
|
||||||
|
|
||||||
|
You can download `test.c` from https://gist.github.com/xianyi/5780018
|
||||||
|
|
||||||
|
On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by
|
||||||
|
default), custom programs statically linked against `libopenblas.a` should also
|
||||||
|
link with the pthread library e.g.:
|
||||||
|
|
||||||
|
```
|
||||||
|
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread
|
||||||
|
```
|
||||||
|
|
||||||
|
Failing to add the `-lpthread` flag will cause errors such as:
|
||||||
|
|
||||||
|
```
|
||||||
|
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory':
|
||||||
|
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock'
|
||||||
|
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock'
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Code examples
|
||||||
|
|
||||||
|
#### Call CBLAS interface
|
||||||
|
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656
|
||||||
|
```
|
||||||
|
#include <cblas.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
void main()
|
||||||
|
{
|
||||||
|
int i=0;
|
||||||
|
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||||
|
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
|
||||||
|
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
|
||||||
|
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);
|
||||||
|
|
||||||
|
for(i=0; i<9; i++)
|
||||||
|
printf("%lf ", C[i]);
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
```
|
||||||
|
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran`
|
||||||
|
|
||||||
|
#### Call BLAS Fortran interface
|
||||||
|
|
||||||
|
This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018
|
||||||
|
|
||||||
|
```
|
||||||
|
#include "stdio.h"
|
||||||
|
#include "stdlib.h"
|
||||||
|
#include "sys/time.h"
|
||||||
|
#include "time.h"
|
||||||
|
|
||||||
|
extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*);
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
printf("test!\n");
|
||||||
|
if(argc<4){
|
||||||
|
printf("Input Error\n");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int m = atoi(argv[1]);
|
||||||
|
int n = atoi(argv[2]);
|
||||||
|
int k = atoi(argv[3]);
|
||||||
|
int sizeofa = m * k;
|
||||||
|
int sizeofb = k * n;
|
||||||
|
int sizeofc = m * n;
|
||||||
|
char ta = 'N';
|
||||||
|
char tb = 'N';
|
||||||
|
double alpha = 1.2;
|
||||||
|
double beta = 0.001;
|
||||||
|
|
||||||
|
struct timeval start,finish;
|
||||||
|
double duration;
|
||||||
|
|
||||||
|
double* A = (double*)malloc(sizeof(double) * sizeofa);
|
||||||
|
double* B = (double*)malloc(sizeof(double) * sizeofb);
|
||||||
|
double* C = (double*)malloc(sizeof(double) * sizeofc);
|
||||||
|
|
||||||
|
srand((unsigned)time(NULL));
|
||||||
|
|
||||||
|
for (i=0; i<sizeofa; i++)
|
||||||
|
A[i] = i%3+1;//(rand()%100)/10.0;
|
||||||
|
|
||||||
|
for (i=0; i<sizeofb; i++)
|
||||||
|
B[i] = i%3+1;//(rand()%100)/10.0;
|
||||||
|
|
||||||
|
for (i=0; i<sizeofc; i++)
|
||||||
|
C[i] = i%3+1;//(rand()%100)/10.0;
|
||||||
|
//#if 0
|
||||||
|
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);
|
||||||
|
gettimeofday(&start, NULL);
|
||||||
|
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);
|
||||||
|
gettimeofday(&finish, NULL);
|
||||||
|
|
||||||
|
duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;
|
||||||
|
double gflops = 2.0 * m *n*k;
|
||||||
|
gflops = gflops/duration*1.0e-6;
|
||||||
|
|
||||||
|
FILE *fp;
|
||||||
|
fp = fopen("timeDGEMM.txt", "a");
|
||||||
|
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops);
|
||||||
|
fclose(fp);
|
||||||
|
|
||||||
|
free(A);
|
||||||
|
free(B);
|
||||||
|
free(C);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a`
|
||||||
|
|
||||||
|
` ./time_dgemm <m> <n> <k> `
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
|
||||||
|
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
|
||||||
|
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
|
||||||
|
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
|
||||||
|
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
|
||||||
|
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.
|
||||||
|
|
||||||
|
## BLAS reference manual
|
||||||
|
If you want to understand every BLAS function and definition, please read
|
||||||
|
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm)
|
||||||
|
or [netlib.org](http://netlib.org/blas/)
|
||||||
|
|
||||||
|
Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions)
|
||||||
|
|
||||||
|
## How to reference OpenBLAS.
|
||||||
|
|
||||||
|
You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications).
|
||||||
|
|
||||||
|
Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly.
|
||||||
|
|
|
@ -39,4 +39,6 @@ before_build:
|
||||||
- cmake -G "Visual Studio 12 Win64" .
|
- cmake -G "Visual Studio 12 Win64" .
|
||||||
|
|
||||||
test_script:
|
test_script:
|
||||||
- echo Build OK!
|
- echo Running Test
|
||||||
|
- cd c:\projects\OpenBLAS\utest
|
||||||
|
- openblas_utest
|
||||||
|
|
|
@ -166,7 +166,8 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||||
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
|
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \
|
||||||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
||||||
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
|
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
|
||||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto
|
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
|
||||||
|
smallscaling
|
||||||
|
|
||||||
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||||
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
||||||
|
@ -2132,6 +2133,8 @@ cgemm3m.$(SUFFIX) : gemm3m.c
|
||||||
zgemm3m.$(SUFFIX) : gemm3m.c
|
zgemm3m.$(SUFFIX) : gemm3m.c
|
||||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||||
|
|
||||||
|
smallscaling: smallscaling.c ../$(LIBNAME)
|
||||||
|
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm
|
||||||
|
|
||||||
clean ::
|
clean ::
|
||||||
@rm -f *.goto *.mkl *.acml *.atlas *.veclib
|
@rm -f *.goto *.mkl *.acml *.atlas *.veclib
|
||||||
|
|
|
@ -172,7 +172,7 @@ int main(int argc, char *argv[]){
|
||||||
srandom(getpid());
|
srandom(getpid());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < to; j++){
|
||||||
for(i = 0; i < to * COMPSIZE; i++){
|
for(i = 0; i < to * COMPSIZE; i++){
|
||||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import numpy
|
||||||
|
from numpy import zeros
|
||||||
|
from numpy.random import randn
|
||||||
|
from scipy.linalg import blas
|
||||||
|
|
||||||
|
|
||||||
|
def run_dsyrk(N, l):
|
||||||
|
|
||||||
|
A = randn(N, N).astype('float64', order='F')
|
||||||
|
C = zeros((N, N), dtype='float64', order='F')
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
for i in range(0, l):
|
||||||
|
blas.dsyrk(1.0, A, c=C, overwrite_c=True)
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
timediff = (end - start)
|
||||||
|
mflops = (N * N * N) * l / timediff
|
||||||
|
mflops *= 1e-6
|
||||||
|
|
||||||
|
size = "%dx%d" % (N, N)
|
||||||
|
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
N = 128
|
||||||
|
NMAX = 2048
|
||||||
|
NINC = 128
|
||||||
|
LOOPS = 1
|
||||||
|
|
||||||
|
z = 0
|
||||||
|
for arg in sys.argv:
|
||||||
|
if z == 1:
|
||||||
|
N = int(arg)
|
||||||
|
elif z == 2:
|
||||||
|
NMAX = int(arg)
|
||||||
|
elif z == 3:
|
||||||
|
NINC = int(arg)
|
||||||
|
elif z == 4:
|
||||||
|
LOOPS = int(arg)
|
||||||
|
|
||||||
|
z = z + 1
|
||||||
|
|
||||||
|
if 'OPENBLAS_LOOPS' in os.environ:
|
||||||
|
p = os.environ['OPENBLAS_LOOPS']
|
||||||
|
if p:
|
||||||
|
LOOPS = int(p)
|
||||||
|
|
||||||
|
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||||
|
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||||
|
|
||||||
|
for i in range(N, NMAX + NINC, NINC):
|
||||||
|
run_dsyrk(i, LOOPS)
|
|
@ -0,0 +1,58 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import numpy
|
||||||
|
from numpy import zeros
|
||||||
|
from numpy.random import randn
|
||||||
|
from scipy.linalg import blas
|
||||||
|
|
||||||
|
|
||||||
|
def run_ssyrk(N, l):
|
||||||
|
|
||||||
|
A = randn(N, N).astype('float32', order='F')
|
||||||
|
C = zeros((N, N), dtype='float32', order='F')
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
for i in range(0, l):
|
||||||
|
blas.ssyrk(1.0, A, c=C, overwrite_c=True)
|
||||||
|
end = time.time()
|
||||||
|
|
||||||
|
timediff = (end - start)
|
||||||
|
mflops = (N * N * N) * l / timediff
|
||||||
|
mflops *= 1e-6
|
||||||
|
|
||||||
|
size = "%dx%d" % (N, N)
|
||||||
|
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
N = 128
|
||||||
|
NMAX = 2048
|
||||||
|
NINC = 128
|
||||||
|
LOOPS = 1
|
||||||
|
|
||||||
|
z = 0
|
||||||
|
for arg in sys.argv:
|
||||||
|
if z == 1:
|
||||||
|
N = int(arg)
|
||||||
|
elif z == 2:
|
||||||
|
NMAX = int(arg)
|
||||||
|
elif z == 3:
|
||||||
|
NINC = int(arg)
|
||||||
|
elif z == 4:
|
||||||
|
LOOPS = int(arg)
|
||||||
|
|
||||||
|
z = z + 1
|
||||||
|
|
||||||
|
if 'OPENBLAS_LOOPS' in os.environ:
|
||||||
|
p = os.environ['OPENBLAS_LOOPS']
|
||||||
|
if p:
|
||||||
|
LOOPS = int(p)
|
||||||
|
|
||||||
|
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS))
|
||||||
|
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime")
|
||||||
|
|
||||||
|
for i in range(N, NMAX + NINC, NINC):
|
||||||
|
run_ssyrk(i, LOOPS)
|
|
@ -0,0 +1,196 @@
|
||||||
|
// run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <time.h>
|
||||||
|
#include <cblas.h>
|
||||||
|
#include <omp.h>
|
||||||
|
#define MIN_SIZE 5
|
||||||
|
#define MAX_SIZE 60
|
||||||
|
#define NB_SIZE 10
|
||||||
|
|
||||||
|
// number of loop for a 1x1 matrix. Lower it if the test is
|
||||||
|
// too slow on you computer.
|
||||||
|
#define NLOOP 2e7
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int matrix_size;
|
||||||
|
int n_loop;
|
||||||
|
void (* bench_func)();
|
||||||
|
void (* blas_func)();
|
||||||
|
void * (* create_matrix)(int size);
|
||||||
|
} BenchParam;
|
||||||
|
|
||||||
|
void * s_create_matrix(int size) {
|
||||||
|
float * r = malloc(size * sizeof(double));
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < size; i++)
|
||||||
|
r[i] = 1e3 * i / size;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * c_create_matrix(int size) {
|
||||||
|
float * r = malloc(size * 2 * sizeof(double));
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < 2 * size; i++)
|
||||||
|
r[i] = 1e3 * i / size;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * z_create_matrix(int size) {
|
||||||
|
double * r = malloc(size * 2 * sizeof(double));
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < 2 * size; i++)
|
||||||
|
r[i] = 1e3 * i / size;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void * d_create_matrix(int size) {
|
||||||
|
double * r = malloc(size * sizeof(double));
|
||||||
|
int i;
|
||||||
|
for(i = 0; i < size; i++)
|
||||||
|
r[i] = 1e3 * i / size;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void trmv_bench(BenchParam * param)
|
||||||
|
{
|
||||||
|
int i, n;
|
||||||
|
int size = param->matrix_size;
|
||||||
|
n = param->n_loop / size;
|
||||||
|
int one = 1;
|
||||||
|
void * A = param->create_matrix(size * size);
|
||||||
|
void * y = param->create_matrix(size);
|
||||||
|
for(i = 0; i < n; i++) {
|
||||||
|
param->blas_func("U", "N", "N", &size, A, &size, y, &one);
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
free(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gemv_bench(BenchParam * param)
|
||||||
|
{
|
||||||
|
int i, n;
|
||||||
|
int size = param->matrix_size;
|
||||||
|
n = param->n_loop / size;
|
||||||
|
double v = 1.01;
|
||||||
|
int one = 1;
|
||||||
|
void * A = param->create_matrix(size * size);
|
||||||
|
void * y = param->create_matrix(size);
|
||||||
|
for(i = 0; i < n; i++) {
|
||||||
|
param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one);
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
free(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ger_bench(BenchParam * param) {
|
||||||
|
int i, n;
|
||||||
|
int size = param->matrix_size;
|
||||||
|
n = param->n_loop / size;
|
||||||
|
double v = 1.01;
|
||||||
|
int one = 1;
|
||||||
|
void * A = param->create_matrix(size * size);
|
||||||
|
void * y = param->create_matrix(size);
|
||||||
|
for(i = 0; i < n; i++) {
|
||||||
|
param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size);
|
||||||
|
}
|
||||||
|
free(A);
|
||||||
|
free(y);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _WIN32
|
||||||
|
void * pthread_func_wrapper(void * param) {
|
||||||
|
((BenchParam *)param)->bench_func(param);
|
||||||
|
pthread_exit(NULL);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define NB_TESTS 5
|
||||||
|
void * TESTS[4 * NB_TESTS] = {
|
||||||
|
trmv_bench, ztrmv_, z_create_matrix, "ztrmv",
|
||||||
|
gemv_bench, dgemv_, d_create_matrix, "dgemv",
|
||||||
|
gemv_bench, zgemv_, z_create_matrix, "zgemv",
|
||||||
|
ger_bench, dger_, d_create_matrix, "dger",
|
||||||
|
ger_bench, zgerc_, z_create_matrix, "zgerc",
|
||||||
|
};
|
||||||
|
|
||||||
|
inline static double delta_time(struct timespec tick) {
|
||||||
|
struct timespec tock;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tock);
|
||||||
|
return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9;
|
||||||
|
}
|
||||||
|
|
||||||
|
double pthread_bench(BenchParam * param, int nb_threads)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
return 0;
|
||||||
|
#else
|
||||||
|
BenchParam threaded_param = *param;
|
||||||
|
pthread_t threads[nb_threads];
|
||||||
|
int t, rc;
|
||||||
|
struct timespec tick;
|
||||||
|
threaded_param.n_loop /= nb_threads;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||||
|
for(t=0; t<nb_threads; t++){
|
||||||
|
rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param);
|
||||||
|
if (rc){
|
||||||
|
printf("ERROR; return code from pthread_create() is %d\n", rc);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(t=0; t<nb_threads; t++){
|
||||||
|
pthread_join(threads[t], NULL);
|
||||||
|
}
|
||||||
|
return delta_time(tick);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
double seq_bench(BenchParam * param) {
|
||||||
|
struct timespec tick;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||||
|
param->bench_func(param);
|
||||||
|
return delta_time(tick);
|
||||||
|
}
|
||||||
|
|
||||||
|
double omp_bench(BenchParam * param) {
|
||||||
|
BenchParam threaded_param = *param;
|
||||||
|
struct timespec tick;
|
||||||
|
int t;
|
||||||
|
int nb_threads = omp_get_max_threads();
|
||||||
|
threaded_param.n_loop /= nb_threads;
|
||||||
|
clock_gettime(CLOCK_MONOTONIC, &tick);
|
||||||
|
#pragma omp parallel for
|
||||||
|
for(t = 0; t < nb_threads; t ++){
|
||||||
|
param->bench_func(&threaded_param);
|
||||||
|
}
|
||||||
|
return delta_time(tick);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char * argv[]) {
|
||||||
|
double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE);
|
||||||
|
BenchParam param;
|
||||||
|
int test_id;
|
||||||
|
printf ("Running on %d threads\n", omp_get_max_threads());
|
||||||
|
for(test_id = 0; test_id < NB_TESTS; test_id ++) {
|
||||||
|
double size = MIN_SIZE;
|
||||||
|
param.bench_func = TESTS[test_id * 4];
|
||||||
|
param.blas_func = TESTS[test_id * 4 + 1];
|
||||||
|
param.create_matrix = TESTS[test_id * 4 + 2];
|
||||||
|
printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]);
|
||||||
|
param.n_loop = NLOOP;
|
||||||
|
while(size <= MAX_SIZE) {
|
||||||
|
param.matrix_size = (int)(size + 0.5);
|
||||||
|
double seq_time = seq_bench(¶m);
|
||||||
|
double omp_time = omp_bench(¶m);
|
||||||
|
double pthread_time = pthread_bench(¶m, omp_get_max_threads());
|
||||||
|
printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, "
|
||||||
|
"pthread %gs, speedup %g\n",
|
||||||
|
param.matrix_size, seq_time,
|
||||||
|
omp_time, seq_time / omp_time,
|
||||||
|
pthread_time, seq_time / pthread_time);
|
||||||
|
size *= inc_factor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(0);
|
||||||
|
}
|
1
c_check
1
c_check
|
@ -6,6 +6,7 @@ $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
|
||||||
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
$hostarch = "x86_64" if ($hostarch eq "amd64");
|
||||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
$hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
||||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||||
|
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
|
||||||
|
|
||||||
$binary = $ENV{"BINARY"};
|
$binary = $ENV{"BINARY"};
|
||||||
|
|
||||||
|
|
|
@ -14,12 +14,12 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
|
||||||
if (NOT NO_EXPRECISION)
|
if (NOT NO_EXPRECISION)
|
||||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||||
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
|
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
|
||||||
set(EXPRECISION 1)
|
set(EXPRECISION 1)
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double")
|
||||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||||
endif ()
|
endif ()
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||||
set(EXPRECISION 1)
|
set(EXPRECISION 1)
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION")
|
||||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double")
|
||||||
|
@ -28,35 +28,35 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (USE_OPENMP)
|
if (USE_OPENMP)
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang")
|
||||||
message(WARNING "Clang doesn't support OpenMP yet.")
|
message(WARNING "Clang doesn't support OpenMP yet.")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||||
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
|
set(CEXTRALIB "${CEXTRALIB} -lstdc++")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE")
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -mp")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
@ -87,7 +87,7 @@ if (${ARCH} STREQUAL "ia64")
|
||||||
set(BINARY_DEFINED 1)
|
set(BINARY_DEFINED 1)
|
||||||
|
|
||||||
if (${F_COMPILER} MATCHES "GFORTRAN")
|
if (${F_COMPILER} MATCHES "GFORTRAN")
|
||||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU")
|
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||||
# EXPRECISION = 1
|
# EXPRECISION = 1
|
||||||
# CCOMMON_OPT += -DEXPRECISION
|
# CCOMMON_OPT += -DEXPRECISION
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -48,18 +48,18 @@ set(SLASRC
|
||||||
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
|
||||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
|
||||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
|
||||||
sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
|
||||||
sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
|
sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f
|
||||||
sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
|
sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f
|
||||||
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f
|
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f
|
||||||
sgetc2.f sgetri.f
|
sgetc2.f sgetri.f
|
||||||
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f
|
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f
|
||||||
sggglm.f sgghrd.f sgglse.f sggqrf.f
|
sggglm.f sgghrd.f sgglse.f sggqrf.f
|
||||||
sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
|
sggrqf.f DEPRECATED/sggsvd.f DEPRECATED/sggsvp.f sgtcon.f sgtrfs.f sgtsv.f
|
||||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
|
||||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
shsein.f shseqr.f slabrd.f slacon.f slacn2.f
|
||||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f
|
||||||
slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f
|
||||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
|
||||||
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
slansy.f slantb.f slantp.f slantr.f slanv2.f
|
||||||
slapll.f slapmt.f
|
slapll.f slapmt.f
|
||||||
|
@ -69,7 +69,7 @@ set(SLASRC
|
||||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
|
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f
|
||||||
slarrv.f slartv.f
|
slarrv.f slartv.f
|
||||||
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
|
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f
|
||||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f
|
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f
|
||||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
sopgtr.f sopmtr.f sorg2l.f sorg2r.f
|
||||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f
|
||||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f
|
sorgrq.f sorgtr.f sorm2l.f sorm2r.f
|
||||||
|
@ -97,7 +97,7 @@ set(SLASRC
|
||||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f
|
||||||
stptrs.f
|
stptrs.f
|
||||||
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f
|
||||||
strtrs.f stzrqf.f stzrzf.f sstemr.f
|
strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f
|
||||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f
|
||||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
stfttr.f stpttf.f stpttr.f strttf.f strttp.f
|
||||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f
|
||||||
|
@ -114,14 +114,14 @@ set(CLASRC
|
||||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f
|
||||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
|
||||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
|
||||||
cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f
|
||||||
cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
|
||||||
cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
|
DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f
|
||||||
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
|
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f
|
||||||
cgesvx.f cgetc2.f cgetri.f
|
cgesvx.f cgetc2.f cgetri.f
|
||||||
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
|
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f
|
||||||
cgghrd.f cgglse.f cggqrf.f cggrqf.f
|
cgghrd.f cgglse.f cggqrf.f cggrqf.f
|
||||||
cggsvd.f cggsvp.f
|
DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f
|
||||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
|
||||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f
|
||||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f
|
||||||
|
@ -138,7 +138,7 @@ set(CLASRC
|
||||||
claed0.f claed7.f claed8.f
|
claed0.f claed7.f claed8.f
|
||||||
claein.f claesy.f claev2.f clags2.f clagtm.f
|
claein.f claesy.f claev2.f clags2.f clagtm.f
|
||||||
clahef.f clahef_rook.f clahqr.f
|
clahef.f clahef_rook.f clahqr.f
|
||||||
clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f
|
||||||
clanhb.f clanhe.f
|
clanhb.f clanhe.f
|
||||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
|
||||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
|
||||||
|
@ -149,7 +149,7 @@ set(CLASRC
|
||||||
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
||||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
||||||
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
|
||||||
clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
|
||||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f
|
||||||
cposv.f cposvx.f cpstrf.f cpstf2.f
|
cposv.f cposvx.f cpstrf.f cpstf2.f
|
||||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
|
||||||
|
@ -166,7 +166,7 @@ set(CLASRC
|
||||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f
|
||||||
ctprfs.f ctptri.f
|
ctprfs.f ctptri.f
|
||||||
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f
|
||||||
ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f
|
ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f
|
||||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f
|
||||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
|
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f
|
||||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f
|
||||||
|
@ -186,18 +186,18 @@ set(DLASRC
|
||||||
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
|
||||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
|
||||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
|
||||||
dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
|
||||||
dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
|
dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f
|
||||||
dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
|
dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f
|
||||||
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f
|
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f
|
||||||
dgetc2.f dgetri.f
|
dgetc2.f dgetri.f
|
||||||
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f
|
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f
|
||||||
dggglm.f dgghrd.f dgglse.f dggqrf.f
|
dggglm.f dgghrd.f dgglse.f dggqrf.f
|
||||||
dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
|
dggrqf.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f
|
||||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
|
||||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f
|
||||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f
|
||||||
dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f
|
||||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
|
||||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
|
||||||
dlapll.f dlapmt.f
|
dlapll.f dlapmt.f
|
||||||
|
@ -207,7 +207,7 @@ set(DLASRC
|
||||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
|
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f
|
||||||
dlargv.f dlarrv.f dlartv.f
|
dlargv.f dlarrv.f dlartv.f
|
||||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
|
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f
|
||||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f
|
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f
|
||||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
dopgtr.f dopmtr.f dorg2l.f dorg2r.f
|
||||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f
|
||||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f
|
dorgrq.f dorgtr.f dorm2l.f dorm2r.f
|
||||||
|
@ -235,7 +235,7 @@ set(DLASRC
|
||||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f
|
||||||
dtptrs.f
|
dtptrs.f
|
||||||
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f
|
||||||
dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f
|
dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f
|
||||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f
|
||||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f
|
||||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f
|
||||||
|
@ -251,14 +251,14 @@ set(ZLASRC
|
||||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f
|
||||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
|
||||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
|
||||||
zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f
|
DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f
|
||||||
zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
|
||||||
zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
|
||||||
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
|
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f
|
||||||
zgetri.f
|
zgetri.f
|
||||||
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f
|
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f
|
||||||
zgghrd.f zgglse.f zggqrf.f zggrqf.f
|
zgghrd.f zgglse.f zggqrf.f zggrqf.f
|
||||||
zggsvd.f zggsvp.f
|
DEPRECATED/zggsvd.f DEPRECATED/zggsvp.f
|
||||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
|
||||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f
|
||||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f
|
||||||
|
@ -275,7 +275,7 @@ set(ZLASRC
|
||||||
zlaed0.f zlaed7.f zlaed8.f
|
zlaed0.f zlaed7.f zlaed8.f
|
||||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f
|
||||||
zlahef.f zlahef_rook.f zlahqr.f
|
zlahef.f zlahef_rook.f zlahqr.f
|
||||||
zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f
|
||||||
zlangt.f zlanhb.f
|
zlangt.f zlanhb.f
|
||||||
zlanhe.f
|
zlanhe.f
|
||||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
|
||||||
|
@ -288,7 +288,7 @@ set(ZLASRC
|
||||||
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
||||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
||||||
zlassq.f zlasyf.f zlasyf_rook.f
|
zlassq.f zlasyf.f zlasyf_rook.f
|
||||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f
|
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f
|
||||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
|
||||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f
|
||||||
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f
|
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f
|
||||||
|
@ -306,7 +306,7 @@ set(ZLASRC
|
||||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f
|
||||||
ztprfs.f ztptri.f
|
ztprfs.f ztptri.f
|
||||||
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f
|
||||||
ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f
|
ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f
|
||||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f
|
||||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
|
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f
|
||||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f
|
||||||
|
|
|
@ -2038,6 +2038,59 @@ set(MATGEN
|
||||||
lapacke_zlagsy_work.c
|
lapacke_zlagsy_work.c
|
||||||
)
|
)
|
||||||
|
|
||||||
|
set(Utils_SRC
|
||||||
|
lapacke_cgb_nancheck.c lapacke_dpf_nancheck.c lapacke_ssy_trans.c
|
||||||
|
lapacke_cgb_trans.c lapacke_dpf_trans.c lapacke_stb_nancheck.c
|
||||||
|
lapacke_cge_nancheck.c lapacke_dpo_nancheck.c lapacke_stb_trans.c
|
||||||
|
lapacke_cge_trans.c lapacke_dpo_trans.c lapacke_stf_nancheck.c
|
||||||
|
lapacke_cgg_nancheck.c lapacke_dpp_nancheck.c lapacke_stf_trans.c
|
||||||
|
lapacke_cgg_trans.c lapacke_dpp_trans.c lapacke_stp_nancheck.c
|
||||||
|
lapacke_cgt_nancheck.c lapacke_dpt_nancheck.c lapacke_stp_trans.c
|
||||||
|
lapacke_chb_nancheck.c lapacke_dsb_nancheck.c lapacke_str_nancheck.c
|
||||||
|
lapacke_chb_trans.c lapacke_dsb_trans.c lapacke_str_trans.c
|
||||||
|
lapacke_che_nancheck.c lapacke_dsp_nancheck.c lapacke_xerbla.c
|
||||||
|
lapacke_che_trans.c lapacke_dsp_trans.c lapacke_zgb_nancheck.c
|
||||||
|
lapacke_chp_nancheck.c lapacke_dst_nancheck.c lapacke_zgb_trans.c
|
||||||
|
lapacke_chp_trans.c lapacke_dsy_nancheck.c lapacke_zge_nancheck.c
|
||||||
|
lapacke_chs_nancheck.c lapacke_dsy_trans.c lapacke_zge_trans.c
|
||||||
|
lapacke_chs_trans.c lapacke_dtb_nancheck.c lapacke_zgg_nancheck.c
|
||||||
|
lapacke_c_nancheck.c lapacke_dtb_trans.c lapacke_zgg_trans.c
|
||||||
|
lapacke_cpb_nancheck.c lapacke_dtf_nancheck.c lapacke_zgt_nancheck.c
|
||||||
|
lapacke_cpb_trans.c lapacke_dtf_trans.c lapacke_zhb_nancheck.c
|
||||||
|
lapacke_cpf_nancheck.c lapacke_dtp_nancheck.c lapacke_zhb_trans.c
|
||||||
|
lapacke_cpf_trans.c lapacke_dtp_trans.c lapacke_zhe_nancheck.c
|
||||||
|
lapacke_cpo_nancheck.c lapacke_dtr_nancheck.c lapacke_zhe_trans.c
|
||||||
|
lapacke_cpo_trans.c lapacke_dtr_trans.c lapacke_zhp_nancheck.c
|
||||||
|
lapacke_cpp_nancheck.c lapacke_lsame.c lapacke_zhp_trans.c
|
||||||
|
lapacke_cpp_trans.c lapacke_make_complex_double.c lapacke_zhs_nancheck.c
|
||||||
|
lapacke_cpt_nancheck.c lapacke_make_complex_float.c lapacke_zhs_trans.c
|
||||||
|
lapacke_csp_nancheck.c lapacke_sgb_nancheck.c lapacke_z_nancheck.c
|
||||||
|
lapacke_csp_trans.c lapacke_sgb_trans.c lapacke_zpb_nancheck.c
|
||||||
|
lapacke_cst_nancheck.c lapacke_sge_nancheck.c lapacke_zpb_trans.c
|
||||||
|
lapacke_csy_nancheck.c lapacke_sge_trans.c lapacke_zpf_nancheck.c
|
||||||
|
lapacke_csy_trans.c lapacke_sgg_nancheck.c lapacke_zpf_trans.c
|
||||||
|
lapacke_ctb_nancheck.c lapacke_sgg_trans.c lapacke_zpo_nancheck.c
|
||||||
|
lapacke_ctb_trans.c lapacke_sgt_nancheck.c lapacke_zpo_trans.c
|
||||||
|
lapacke_ctf_nancheck.c lapacke_shs_nancheck.c lapacke_zpp_nancheck.c
|
||||||
|
lapacke_ctf_trans.c lapacke_shs_trans.c lapacke_zpp_trans.c
|
||||||
|
lapacke_ctp_nancheck.c lapacke_s_nancheck.c lapacke_zpt_nancheck.c
|
||||||
|
lapacke_ctp_trans.c lapacke_spb_nancheck.c lapacke_zsp_nancheck.c
|
||||||
|
lapacke_ctr_nancheck.c lapacke_spb_trans.c lapacke_zsp_trans.c
|
||||||
|
lapacke_ctr_trans.c lapacke_spf_nancheck.c lapacke_zst_nancheck.c
|
||||||
|
lapacke_dgb_nancheck.c lapacke_spf_trans.c lapacke_zsy_nancheck.c
|
||||||
|
lapacke_dgb_trans.c lapacke_spo_nancheck.c lapacke_zsy_trans.c
|
||||||
|
lapacke_dge_nancheck.c lapacke_spo_trans.c lapacke_ztb_nancheck.c
|
||||||
|
lapacke_dge_trans.c lapacke_spp_nancheck.c lapacke_ztb_trans.c
|
||||||
|
lapacke_dgg_nancheck.c lapacke_spp_trans.c lapacke_ztf_nancheck.c
|
||||||
|
lapacke_dgg_trans.c lapacke_spt_nancheck.c lapacke_ztf_trans.c
|
||||||
|
lapacke_dgt_nancheck.c lapacke_ssb_nancheck.c lapacke_ztp_nancheck.c
|
||||||
|
lapacke_dhs_nancheck.c lapacke_ssb_trans.c lapacke_ztp_trans.c
|
||||||
|
lapacke_dhs_trans.c lapacke_ssp_nancheck.c lapacke_ztr_nancheck.c
|
||||||
|
lapacke_d_nancheck.c lapacke_ssp_trans.c lapacke_ztr_trans.c
|
||||||
|
lapacke_dpb_nancheck.c lapacke_sst_nancheck.c
|
||||||
|
lapacke_dpb_trans.c lapacke_ssy_nancheck.c
|
||||||
|
)
|
||||||
|
|
||||||
set(LAPACKE_REL_SRC "")
|
set(LAPACKE_REL_SRC "")
|
||||||
if (BUILD_SINGLE)
|
if (BUILD_SINGLE)
|
||||||
list(APPEND LAPACKE_REL_SRC ${SSRC})
|
list(APPEND LAPACKE_REL_SRC ${SSRC})
|
||||||
|
@ -2058,10 +2111,14 @@ endif ()
|
||||||
# add lapack-netlib folder to the sources
|
# add lapack-netlib folder to the sources
|
||||||
set(LAPACKE_SOURCES "")
|
set(LAPACKE_SOURCES "")
|
||||||
foreach (LAE_FILE ${LAPACKE_REL_SRC})
|
foreach (LAE_FILE ${LAPACKE_REL_SRC})
|
||||||
list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/lapacke/src/${LAE_FILE}")
|
list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/LAPACKE/src/${LAE_FILE}")
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/lapacke/include")
|
foreach (Utils_FILE ${Utils_SRC})
|
||||||
|
list(APPEND LAPACKE_SOURCES "${NETLIB_LAPACK_DIR}/LAPACKE/utils/${Utils_FILE}")
|
||||||
|
endforeach ()
|
||||||
|
|
||||||
|
set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include")
|
||||||
execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${lapacke_include_dir}/lapacke_mangling_with_flags.h" "${lapacke_include_dir}/lapacke_mangling.h")
|
execute_process(COMMAND ${CMAKE_COMMAND} -E copy "${lapacke_include_dir}/lapacke_mangling_with_flags.h" "${lapacke_include_dir}/lapacke_mangling.h")
|
||||||
include_directories(${lapacke_include_dir})
|
include_directories(${lapacke_include_dir})
|
||||||
set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}")
|
set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}")
|
||||||
|
|
9
common.h
9
common.h
|
@ -86,13 +86,14 @@ extern "C" {
|
||||||
#if !defined(_MSC_VER)
|
#if !defined(_MSC_VER)
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
#ifdef OS_LINUX
|
#ifdef OS_LINUX
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD)
|
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID)
|
||||||
#include <sched.h>
|
#include <sched.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -331,12 +332,13 @@ typedef int blasint;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
#ifdef PILEDRIVER
|
#ifdef PILEDRIVER
|
||||||
#ifndef YIELDING
|
#ifndef YIELDING
|
||||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#ifdef STEAMROLLER
|
#ifdef STEAMROLLER
|
||||||
|
@ -410,7 +412,7 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
#ifdef OS_WINDOWS
|
#ifdef OS_WINDOWS
|
||||||
typedef char env_var_t[MAX_PATH];
|
typedef char env_var_t[MAX_PATH];
|
||||||
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p))
|
#define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p))
|
||||||
#else
|
#else
|
||||||
typedef char* env_var_t;
|
typedef char* env_var_t;
|
||||||
#define readenv(p, n) ((p)=getenv(n))
|
#define readenv(p, n) ((p)=getenv(n))
|
||||||
|
@ -726,6 +728,7 @@ typedef struct {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
#include "common_stackalloc.h"
|
||||||
#if 0
|
#if 0
|
||||||
#include "symcopy.h"
|
#include "symcopy.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -43,28 +43,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
|
||||||
static void __inline blas_lock(volatile BLASULONG *address){
|
static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
|
||||||
long register ret;
|
BLASULONG ret;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
while (*address) {YIELDING;};
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
"ldaxr %0, [%1] \n\t"
|
"mov x4, #1 \n\t"
|
||||||
"stlxr w2, %2, [%1] \n\t"
|
"1: \n\t"
|
||||||
"orr %0, %0, x2 \n\t"
|
"ldaxr x2, [%1] \n\t"
|
||||||
: "=r"(ret)
|
"cbnz x2, 1b \n\t"
|
||||||
: "r"(address), "r"(1l)
|
"2: \n\t"
|
||||||
: "memory", "x2"
|
"stxr w3, x4, [%1] \n\t"
|
||||||
|
"cbnz w3, 1b \n\t"
|
||||||
|
"mov %0, #0 \n\t"
|
||||||
|
: "=r"(ret), "=r"(address)
|
||||||
|
: "1"(address)
|
||||||
|
: "memory", "x2" , "x3", "x4"
|
||||||
|
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
} while (ret);
|
} while (ret);
|
||||||
MB;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define BLAS_LOCK_DEFINED
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static inline int blas_quickdivide(blasint x, blasint y){
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
return x / y;
|
return x / y;
|
||||||
}
|
}
|
||||||
|
@ -89,8 +100,10 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
#if defined(ASSEMBLER) && !defined(NEEDPARAM)
|
||||||
|
|
||||||
#define PROLOGUE \
|
#define PROLOGUE \
|
||||||
|
.text ;\
|
||||||
|
.align 4 ;\
|
||||||
.global REALNAME ;\
|
.global REALNAME ;\
|
||||||
.func REALNAME ;\
|
.type REALNAME, %function ;\
|
||||||
REALNAME:
|
REALNAME:
|
||||||
|
|
||||||
#define EPILOGUE
|
#define EPILOGUE
|
||||||
|
@ -107,7 +120,11 @@ REALNAME:
|
||||||
#endif
|
#endif
|
||||||
#define HUGE_PAGESIZE ( 4 << 20)
|
#define HUGE_PAGESIZE ( 4 << 20)
|
||||||
|
|
||||||
|
#if defined(CORTEXA57)
|
||||||
|
#define BUFFER_SIZE (20 << 20)
|
||||||
|
#else
|
||||||
#define BUFFER_SIZE (16 << 20)
|
#define BUFFER_SIZE (16 << 20)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER)
|
||||||
|
|
|
@ -236,7 +236,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
#define HAVE_PREFETCH
|
#define HAVE_PREFETCH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL)
|
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8)
|
||||||
#define DCBT_ARG 0
|
#define DCBT_ARG 0
|
||||||
#else
|
#else
|
||||||
#define DCBT_ARG 8
|
#define DCBT_ARG 8
|
||||||
|
@ -258,6 +258,13 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
#define L1_PREFETCH dcbtst
|
#define L1_PREFETCH dcbtst
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(POWER8)
|
||||||
|
#define L1_DUALFETCH
|
||||||
|
#define L1_PREFETCHSIZE (16 + 128 * 100)
|
||||||
|
#define L1_PREFETCH dcbtst
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#
|
||||||
#ifndef L1_PREFETCH
|
#ifndef L1_PREFETCH
|
||||||
#define L1_PREFETCH dcbt
|
#define L1_PREFETCH dcbt
|
||||||
#endif
|
#endif
|
||||||
|
@ -790,6 +797,8 @@ Lmcount$lazy_ptr:
|
||||||
#define BUFFER_SIZE ( 2 << 20)
|
#define BUFFER_SIZE ( 2 << 20)
|
||||||
#elif defined(PPC440FP2)
|
#elif defined(PPC440FP2)
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
|
#elif defined(POWER8)
|
||||||
|
#define BUFFER_SIZE ( 64 << 20)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
Copyright (c) 2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#define STACK_ALLOC_PROTECT
|
||||||
|
#ifdef STACK_ALLOC_PROTECT
|
||||||
|
// Try to detect stack smashing
|
||||||
|
#include <assert.h>
|
||||||
|
#define STACK_ALLOC_PROTECT_SET volatile int stack_check = 0x7fc01234;
|
||||||
|
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7fc01234);
|
||||||
|
#else
|
||||||
|
#define STACK_ALLOC_PROTECT_SET
|
||||||
|
#define STACK_ALLOC_PROTECT_CHECK
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC.
|
||||||
|
* Stack allocation is much faster than blas_memory_alloc or malloc, particularly
|
||||||
|
* when OpenBLAS is used from a multi-threaded application.
|
||||||
|
* SIZE must be carefully chosen to be:
|
||||||
|
* - as small as possible to maximize the number of stack allocation
|
||||||
|
* - large enough to support all architectures and kernel
|
||||||
|
* Chosing a too small SIZE will lead to a stack smashing.
|
||||||
|
*/
|
||||||
|
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
||||||
|
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
||||||
|
/* do not restore all register */ \
|
||||||
|
volatile int stack_alloc_size = SIZE; \
|
||||||
|
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
|
||||||
|
stack_alloc_size = 0; \
|
||||||
|
STACK_ALLOC_PROTECT_SET \
|
||||||
|
TYPE stack_buffer[stack_alloc_size] __attribute__((aligned(0x20))); \
|
||||||
|
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
|
||||||
|
#else
|
||||||
|
//Original OpenBLAS/GotoBLAS codes.
|
||||||
|
#define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||||
|
#define STACK_FREE(BUFFER) \
|
||||||
|
STACK_ALLOC_PROTECT_CHECK \
|
||||||
|
if(!stack_alloc_size) \
|
||||||
|
blas_memory_free(BUFFER);
|
||||||
|
#else
|
||||||
|
#define STACK_FREE(BUFFER) blas_memory_free(BUFFER)
|
||||||
|
#endif
|
||||||
|
|
13
common_x86.h
13
common_x86.h
|
@ -41,6 +41,10 @@
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
#ifdef C_MSVC
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MB
|
#define MB
|
||||||
#define WMB
|
#define WMB
|
||||||
|
|
||||||
|
@ -170,12 +174,13 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||||
|
|
||||||
if (y <= 1) return x;
|
if (y <= 1) return x;
|
||||||
|
|
||||||
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
|
result = x/y;
|
||||||
|
return result;
|
||||||
|
#else
|
||||||
|
|
||||||
y = blas_quick_divide_table[y];
|
y = blas_quick_divide_table[y];
|
||||||
|
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
|
||||||
(void*)result;
|
|
||||||
return x*y;
|
|
||||||
#else
|
|
||||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y));
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
|
|
@ -396,7 +396,7 @@ REALNAME:
|
||||||
|
|
||||||
#define PROFCODE
|
#define PROFCODE
|
||||||
|
|
||||||
#define EPILOGUE .end REALNAME
|
#define EPILOGUE .end
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
|
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI)
|
||||||
|
|
24
cpuid_arm.c
24
cpuid_arm.c
|
@ -115,6 +115,9 @@ int detect(void)
|
||||||
if (strstr(p, "0xc0f")) {
|
if (strstr(p, "0xc0f")) {
|
||||||
return CPU_CORTEXA15;
|
return CPU_CORTEXA15;
|
||||||
}
|
}
|
||||||
|
if (strstr(p, "0xd07")) {
|
||||||
|
return CPU_ARMV7; //ARMV8 on 32-bit
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -158,6 +161,27 @@ int detect(void)
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p = (char *) NULL ;
|
||||||
|
infile = fopen("/proc/cpuinfo", "r");
|
||||||
|
|
||||||
|
while (fgets(buffer, sizeof(buffer), infile))
|
||||||
|
{
|
||||||
|
|
||||||
|
if ((!strncmp("CPU architecture", buffer, 16)))
|
||||||
|
{
|
||||||
|
p = strchr(buffer, ':') + 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fclose(infile);
|
||||||
|
if(p != NULL) {
|
||||||
|
if (strstr(p, "8")) {
|
||||||
|
return CPU_ARMV7; //ARMV8 on 32-bit
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return CPU_UNKNOWN;
|
return CPU_UNKNOWN;
|
||||||
|
|
|
@ -29,12 +29,19 @@
|
||||||
|
|
||||||
#define CPU_UNKNOWN 0
|
#define CPU_UNKNOWN 0
|
||||||
#define CPU_ARMV8 1
|
#define CPU_ARMV8 1
|
||||||
|
#define CPU_CORTEXA57 2
|
||||||
|
|
||||||
static char *cpuname[] = {
|
static char *cpuname[] = {
|
||||||
"UNKOWN",
|
"UNKNOWN",
|
||||||
"ARMV8"
|
"ARMV8" ,
|
||||||
|
"CORTEXA57"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static char *cpuname_lower[] = {
|
||||||
|
"unknown",
|
||||||
|
"armv8" ,
|
||||||
|
"cortexa57"
|
||||||
|
};
|
||||||
|
|
||||||
int get_feature(char *search)
|
int get_feature(char *search)
|
||||||
{
|
{
|
||||||
|
@ -53,13 +60,13 @@ int get_feature(char *search)
|
||||||
{
|
{
|
||||||
p = strchr(buffer, ':') + 2;
|
p = strchr(buffer, ':') + 2;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose(infile);
|
fclose(infile);
|
||||||
|
|
||||||
|
|
||||||
if( p == NULL ) return;
|
if( p == NULL ) return 0;
|
||||||
|
|
||||||
t = strtok(p," ");
|
t = strtok(p," ");
|
||||||
while( t = strtok(NULL," "))
|
while( t = strtok(NULL," "))
|
||||||
|
@ -82,11 +89,30 @@ int detect(void)
|
||||||
p = (char *) NULL ;
|
p = (char *) NULL ;
|
||||||
|
|
||||||
infile = fopen("/proc/cpuinfo", "r");
|
infile = fopen("/proc/cpuinfo", "r");
|
||||||
|
|
||||||
while (fgets(buffer, sizeof(buffer), infile))
|
while (fgets(buffer, sizeof(buffer), infile))
|
||||||
{
|
{
|
||||||
|
|
||||||
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)))
|
if (!strncmp("CPU part", buffer, 8))
|
||||||
|
{
|
||||||
|
p = strchr(buffer, ':') + 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(infile);
|
||||||
|
if(p != NULL) {
|
||||||
|
if (strstr(p, "0xd07")) {
|
||||||
|
return CPU_CORTEXA57;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
p = (char *) NULL ;
|
||||||
|
infile = fopen("/proc/cpuinfo", "r");
|
||||||
|
while (fgets(buffer, sizeof(buffer), infile))
|
||||||
|
{
|
||||||
|
|
||||||
|
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9)) ||
|
||||||
|
(!strncmp("CPU architecture", buffer, 16)))
|
||||||
{
|
{
|
||||||
p = strchr(buffer, ':') + 2;
|
p = strchr(buffer, ':') + 2;
|
||||||
break;
|
break;
|
||||||
|
@ -100,7 +126,7 @@ int detect(void)
|
||||||
|
|
||||||
if (strstr(p, "AArch64"))
|
if (strstr(p, "AArch64"))
|
||||||
{
|
{
|
||||||
return CPU_ARMV8;
|
return CPU_ARMV8;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -118,23 +144,13 @@ char *get_corename(void)
|
||||||
|
|
||||||
void get_architecture(void)
|
void get_architecture(void)
|
||||||
{
|
{
|
||||||
printf("ARM");
|
printf("ARM64");
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_subarchitecture(void)
|
void get_subarchitecture(void)
|
||||||
{
|
{
|
||||||
int d = detect();
|
int d = detect();
|
||||||
switch (d)
|
printf("%s", cpuname[d]);
|
||||||
{
|
|
||||||
|
|
||||||
case CPU_ARMV8:
|
|
||||||
printf("ARMV8");
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
printf("UNKNOWN");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_subdirname(void)
|
void get_subdirname(void)
|
||||||
|
@ -160,26 +176,34 @@ void get_cpuconfig(void)
|
||||||
printf("#define L2_ASSOCIATIVE 4\n");
|
printf("#define L2_ASSOCIATIVE 4\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case CPU_CORTEXA57:
|
||||||
|
printf("#define CORTEXA57\n");
|
||||||
|
printf("#define HAVE_VFP\n");
|
||||||
|
printf("#define HAVE_VFPV3\n");
|
||||||
|
printf("#define HAVE_NEON\n");
|
||||||
|
printf("#define HAVE_VFPV4\n");
|
||||||
|
printf("#define L1_CODE_SIZE 49152\n");
|
||||||
|
printf("#define L1_CODE_LINESIZE 64\n");
|
||||||
|
printf("#define L1_CODE_ASSOCIATIVE 3\n");
|
||||||
|
printf("#define L1_DATA_SIZE 32768\n");
|
||||||
|
printf("#define L1_DATA_LINESIZE 64\n");
|
||||||
|
printf("#define L1_DATA_ASSOCIATIVE 2\n");
|
||||||
|
printf("#define L2_SIZE 2097152\n");
|
||||||
|
printf("#define L2_LINESIZE 64\n");
|
||||||
|
printf("#define L2_ASSOCIATIVE 16\n");
|
||||||
|
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||||
|
printf("#define DTB_SIZE 4096\n");
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void get_libname(void)
|
void get_libname(void)
|
||||||
{
|
{
|
||||||
|
|
||||||
int d = detect();
|
int d = detect();
|
||||||
switch (d)
|
printf("%s", cpuname_lower[d]);
|
||||||
{
|
|
||||||
|
|
||||||
case CPU_ARMV8:
|
|
||||||
printf("armv8\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void get_features(void)
|
void get_features(void)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,7 @@
|
||||||
#define CPUTYPE_POWER6 5
|
#define CPUTYPE_POWER6 5
|
||||||
#define CPUTYPE_CELL 6
|
#define CPUTYPE_CELL 6
|
||||||
#define CPUTYPE_PPCG4 7
|
#define CPUTYPE_PPCG4 7
|
||||||
|
#define CPUTYPE_POWER8 8
|
||||||
|
|
||||||
char *cpuname[] = {
|
char *cpuname[] = {
|
||||||
"UNKNOWN",
|
"UNKNOWN",
|
||||||
|
@ -65,6 +66,7 @@ char *cpuname[] = {
|
||||||
"POWER6",
|
"POWER6",
|
||||||
"CELL",
|
"CELL",
|
||||||
"PPCG4",
|
"PPCG4",
|
||||||
|
"POWER8"
|
||||||
};
|
};
|
||||||
|
|
||||||
char *lowercpuname[] = {
|
char *lowercpuname[] = {
|
||||||
|
@ -76,6 +78,7 @@ char *lowercpuname[] = {
|
||||||
"power6",
|
"power6",
|
||||||
"cell",
|
"cell",
|
||||||
"ppcg4",
|
"ppcg4",
|
||||||
|
"power8"
|
||||||
};
|
};
|
||||||
|
|
||||||
char *corename[] = {
|
char *corename[] = {
|
||||||
|
@ -87,6 +90,7 @@ char *corename[] = {
|
||||||
"POWER6",
|
"POWER6",
|
||||||
"CELL",
|
"CELL",
|
||||||
"PPCG4",
|
"PPCG4",
|
||||||
|
"POWER8"
|
||||||
};
|
};
|
||||||
|
|
||||||
int detect(void){
|
int detect(void){
|
||||||
|
@ -115,7 +119,7 @@ int detect(void){
|
||||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
||||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||||
|
|
||||||
|
|
24
cpuid_x86.c
24
cpuid_x86.c
|
@ -1172,6 +1172,9 @@ int get_cpuname(void){
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
return CPUTYPE_NEHALEM;
|
return CPUTYPE_NEHALEM;
|
||||||
|
case 13:
|
||||||
|
// Avoton
|
||||||
|
return CPUTYPE_NEHALEM;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
|
@ -1229,6 +1232,7 @@ int get_cpuname(void){
|
||||||
case 2:
|
case 2:
|
||||||
return CPUTYPE_OPTERON;
|
return CPUTYPE_OPTERON;
|
||||||
case 1:
|
case 1:
|
||||||
|
case 3:
|
||||||
case 10:
|
case 10:
|
||||||
return CPUTYPE_BARCELONA;
|
return CPUTYPE_BARCELONA;
|
||||||
case 6:
|
case 6:
|
||||||
|
@ -1239,13 +1243,19 @@ int get_cpuname(void){
|
||||||
return CPUTYPE_BULLDOZER;
|
return CPUTYPE_BULLDOZER;
|
||||||
else
|
else
|
||||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||||
case 2:
|
case 2: //AMD Piledriver
|
||||||
|
case 3: //AMD Richland
|
||||||
if(support_avx())
|
if(support_avx())
|
||||||
return CPUTYPE_PILEDRIVER;
|
return CPUTYPE_PILEDRIVER;
|
||||||
else
|
else
|
||||||
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||||
case 0:
|
case 0:
|
||||||
switch(exmodel){
|
switch(exmodel){
|
||||||
|
case 1: //AMD Trinity
|
||||||
|
if(support_avx())
|
||||||
|
return CPUTYPE_PILEDRIVER;
|
||||||
|
else
|
||||||
|
return CPUTYPE_BARCELONA; //OS don't support AVX.
|
||||||
case 3:
|
case 3:
|
||||||
if(support_avx())
|
if(support_avx())
|
||||||
return CPUTYPE_STEAMROLLER;
|
return CPUTYPE_STEAMROLLER;
|
||||||
|
@ -1668,6 +1678,9 @@ int get_coretype(void){
|
||||||
#endif
|
#endif
|
||||||
else
|
else
|
||||||
return CORE_NEHALEM;
|
return CORE_NEHALEM;
|
||||||
|
case 13:
|
||||||
|
// Avoton
|
||||||
|
return CORE_NEHALEM;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case 5:
|
case 5:
|
||||||
|
@ -1718,7 +1731,8 @@ int get_coretype(void){
|
||||||
return CORE_BULLDOZER;
|
return CORE_BULLDOZER;
|
||||||
else
|
else
|
||||||
return CORE_BARCELONA; //OS don't support AVX.
|
return CORE_BARCELONA; //OS don't support AVX.
|
||||||
case 2:
|
case 2: //AMD Piledriver
|
||||||
|
case 3: //AMD Richland
|
||||||
if(support_avx())
|
if(support_avx())
|
||||||
return CORE_PILEDRIVER;
|
return CORE_PILEDRIVER;
|
||||||
else
|
else
|
||||||
|
@ -1726,6 +1740,12 @@ int get_coretype(void){
|
||||||
|
|
||||||
case 0:
|
case 0:
|
||||||
switch(exmodel){
|
switch(exmodel){
|
||||||
|
case 1: //AMD Trinity
|
||||||
|
if(support_avx())
|
||||||
|
return CORE_PILEDRIVER;
|
||||||
|
else
|
||||||
|
return CORE_BARCELONA; //OS don't support AVX.
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
if(support_avx())
|
if(support_avx())
|
||||||
return CORE_STEAMROLLER;
|
return CORE_STEAMROLLER;
|
||||||
|
|
|
@ -1365,8 +1365,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1365,8 +1365,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1335,8 +1335,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1339,8 +1339,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1350,7 +1350,7 @@
|
||||||
*
|
*
|
||||||
* Call the subroutine.
|
* Call the subroutine.
|
||||||
*
|
*
|
||||||
IF( SNAME( 4: 5 ).EQ.'mv' )THEN
|
IF( SNAME( 10: 11 ).EQ.'mv' )THEN
|
||||||
IF( FULL )THEN
|
IF( FULL )THEN
|
||||||
IF( TRACE )
|
IF( TRACE )
|
||||||
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
|
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
|
||||||
|
@ -1376,7 +1376,7 @@
|
||||||
CALL CZTPMV( IORDER, UPLO, TRANS, DIAG,
|
CALL CZTPMV( IORDER, UPLO, TRANS, DIAG,
|
||||||
$ N, AA, XX, INCX )
|
$ N, AA, XX, INCX )
|
||||||
END IF
|
END IF
|
||||||
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN
|
ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN
|
||||||
IF( FULL )THEN
|
IF( FULL )THEN
|
||||||
IF( TRACE )
|
IF( TRACE )
|
||||||
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
|
$ WRITE( NTRA, FMT = 9993 )NC, SNAME,
|
||||||
|
@ -1465,7 +1465,7 @@
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
IF( .NOT.NULL )THEN
|
IF( .NOT.NULL )THEN
|
||||||
IF( SNAME( 4: 5 ).EQ.'mv' )THEN
|
IF( SNAME( 10: 11 ).EQ.'mv' )THEN
|
||||||
*
|
*
|
||||||
* Check the result.
|
* Check the result.
|
||||||
*
|
*
|
||||||
|
@ -1473,7 +1473,7 @@
|
||||||
$ INCX, ZERO, Z, INCX, XT, G,
|
$ INCX, ZERO, Z, INCX, XT, G,
|
||||||
$ XX, EPS, ERR, FATAL, NOUT,
|
$ XX, EPS, ERR, FATAL, NOUT,
|
||||||
$ .TRUE. )
|
$ .TRUE. )
|
||||||
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN
|
ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN
|
||||||
*
|
*
|
||||||
* Compute approximation to original vector.
|
* Compute approximation to original vector.
|
||||||
*
|
*
|
||||||
|
@ -1611,7 +1611,7 @@
|
||||||
* .. Common blocks ..
|
* .. Common blocks ..
|
||||||
COMMON /INFOC/INFOT, NOUTC, OK
|
COMMON /INFOC/INFOT, NOUTC, OK
|
||||||
* .. Executable Statements ..
|
* .. Executable Statements ..
|
||||||
CONJ = SNAME( 5: 5 ).EQ.'c'
|
CONJ = SNAME( 11: 11 ).EQ.'c'
|
||||||
* Define the number of arguments.
|
* Define the number of arguments.
|
||||||
NARGS = 9
|
NARGS = 9
|
||||||
*
|
*
|
||||||
|
|
|
@ -1366,8 +1366,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1366,8 +1366,9 @@
|
||||||
*
|
*
|
||||||
150 CONTINUE
|
150 CONTINUE
|
||||||
WRITE( NOUT, FMT = 9996 )SNAME
|
WRITE( NOUT, FMT = 9996 )SNAME
|
||||||
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
IF( TRACE )
|
||||||
$ M, N, ALPHA, LDA, LDB)
|
$ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG,
|
||||||
|
$ M, N, ALPHA, LDA, LDB)
|
||||||
*
|
*
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
RETURN
|
RETURN
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED)
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE
|
||||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0)
|
||||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD.
|
||||||
F LOGICAL FLAG, T TO STOP ON FAILURES.
|
T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
|
|
|
@ -55,7 +55,7 @@
|
||||||
static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
|
static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
BLASLONG incx, incy;
|
BLASLONG incx;
|
||||||
BLASLONG m_from, m_to, i;
|
BLASLONG m_from, m_to, i;
|
||||||
#ifndef COMPLEX
|
#ifndef COMPLEX
|
||||||
FLOAT result;
|
FLOAT result;
|
||||||
|
@ -68,7 +68,6 @@ static int spmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
y = (FLOAT *)args -> c;
|
y = (FLOAT *)args -> c;
|
||||||
|
|
||||||
incx = args -> ldb;
|
incx = args -> ldb;
|
||||||
incy = args -> ldc;
|
|
||||||
|
|
||||||
m_from = 0;
|
m_from = 0;
|
||||||
m_to = args -> m;
|
m_to = args -> m;
|
||||||
|
|
|
@ -43,7 +43,7 @@
|
||||||
static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
|
static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
BLASLONG lda, incx, incy;
|
BLASLONG incx, incy;
|
||||||
BLASLONG i, m_from, m_to;
|
BLASLONG i, m_from, m_to;
|
||||||
FLOAT alpha_r;
|
FLOAT alpha_r;
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
|
@ -56,7 +56,6 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
|
||||||
|
|
||||||
incx = args -> lda;
|
incx = args -> lda;
|
||||||
incy = args -> ldb;
|
incy = args -> ldb;
|
||||||
lda = args -> ldc;
|
|
||||||
|
|
||||||
alpha_r = *((FLOAT *)args -> alpha + 0);
|
alpha_r = *((FLOAT *)args -> alpha + 0);
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
|
|
|
@ -46,7 +46,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
|
||||||
BLASLONG incx;
|
BLASLONG incx;
|
||||||
BLASLONG i, m_from, m_to;
|
BLASLONG i, m_from, m_to;
|
||||||
FLOAT alpha_r;
|
FLOAT alpha_r;
|
||||||
#if defined(COMPLEX) && !defined(HER) && !defined(HERREV)
|
#if defined(COMPLEX) && !defined(HEMV) && !defined(HEMVREV)
|
||||||
FLOAT alpha_i;
|
FLOAT alpha_i;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ static int syr_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FL
|
||||||
incx = args -> lda;
|
incx = args -> lda;
|
||||||
|
|
||||||
alpha_r = *((FLOAT *)args -> alpha + 0);
|
alpha_r = *((FLOAT *)args -> alpha + 0);
|
||||||
#if defined(COMPLEX) && !defined(HER) && !defined(HERREV)
|
#if defined(COMPLEX) && !defined(HEMV) && !defined(HEMVREV)
|
||||||
alpha_i = *((FLOAT *)args -> alpha + 1);
|
alpha_i = *((FLOAT *)args -> alpha + 1);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -55,7 +55,7 @@
|
||||||
static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
|
static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *buffer, BLASLONG pos){
|
||||||
|
|
||||||
FLOAT *a, *x, *y;
|
FLOAT *a, *x, *y;
|
||||||
BLASLONG lda, incx, incy;
|
BLASLONG lda, incx;
|
||||||
BLASLONG m_from, m_to;
|
BLASLONG m_from, m_to;
|
||||||
|
|
||||||
a = (FLOAT *)args -> a;
|
a = (FLOAT *)args -> a;
|
||||||
|
@ -64,7 +64,6 @@ static int symv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
|
|
||||||
lda = args -> lda;
|
lda = args -> lda;
|
||||||
incx = args -> ldb;
|
incx = args -> ldb;
|
||||||
incy = args -> ldc;
|
|
||||||
|
|
||||||
m_from = 0;
|
m_from = 0;
|
||||||
m_to = args -> m;
|
m_to = args -> m;
|
||||||
|
|
|
@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.;
|
||||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
|
|
||||||
COPY_K(n, b, incb, buffer, 1);
|
COPY_K(n, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.;
|
||||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
|
|
||||||
COPY_K(n, b, incb, buffer, 1);
|
COPY_K(n, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.;
|
||||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
|
|
||||||
COPY_K(n, b, incb, buffer, 1);
|
COPY_K(n, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,13 +45,11 @@ const static FLOAT dp1 = 1.;
|
||||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) + 4095) & ~4095);
|
|
||||||
COPY_K(n, b, incb, buffer, 1);
|
COPY_K(n, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,12 +43,10 @@
|
||||||
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095);
|
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -43,12 +43,10 @@
|
||||||
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) + 4095) & ~4095);
|
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -119,7 +119,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
x = buffer;
|
x = buffer;
|
||||||
buffer += ((COMPSIZE * args -> m + 1023) & ~1023);
|
buffer += ((COMPSIZE * args -> m + 3) & ~3);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef TRANS
|
#ifndef TRANS
|
||||||
|
@ -403,7 +403,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
|
||||||
|
|
||||||
if (num_cpu) {
|
if (num_cpu) {
|
||||||
queue[0].sa = NULL;
|
queue[0].sa = NULL;
|
||||||
queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE;
|
queue[0].sb = buffer + num_cpu * (((m + 3) & ~3) + 16) * COMPSIZE;
|
||||||
|
|
||||||
queue[num_cpu - 1].next = NULL;
|
queue[num_cpu - 1].next = NULL;
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.;
|
||||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
|
@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE+ 4095) & ~4095);
|
|
||||||
COPY_K(n, b, incb, buffer, 1);
|
COPY_K(n, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.;
|
||||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
|
@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
|
||||||
COPY_K(n, b, incb, buffer, 1);
|
COPY_K(n, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.;
|
||||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
|
@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE + 4095) & ~4095);
|
|
||||||
COPY_K(n, b, incb, buffer, 1);
|
COPY_K(n, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,7 +45,6 @@ const static FLOAT dp1 = 1.;
|
||||||
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
|
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
BLASLONG length;
|
BLASLONG length;
|
||||||
#if (TRANSA == 2) || (TRANSA == 4)
|
#if (TRANSA == 2) || (TRANSA == 4)
|
||||||
|
@ -57,7 +56,6 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG inc
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + n * sizeof(FLOAT) * COMPSIZE+ 4095) & ~4095);
|
|
||||||
COPY_K(n, b, incb, buffer, 1);
|
COPY_K(n, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,12 +49,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||||
#endif
|
#endif
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,12 +49,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT atemp1, atemp2, btemp1, btemp2;
|
FLOAT atemp1, atemp2, btemp1, btemp2;
|
||||||
#endif
|
#endif
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -51,12 +51,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT ar, ai, br, bi, ratio, den;
|
FLOAT ar, ai, br, bi, ratio, den;
|
||||||
#endif
|
#endif
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -49,12 +49,10 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *b, BLASLONG incb, void *buffer){
|
||||||
#ifndef UNIT
|
#ifndef UNIT
|
||||||
FLOAT ar, ai, br, bi, ratio, den;
|
FLOAT ar, ai, br, bi, ratio, den;
|
||||||
#endif
|
#endif
|
||||||
FLOAT *gemvbuffer = (FLOAT *)buffer;
|
|
||||||
FLOAT *B = b;
|
FLOAT *B = b;
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15);
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu
|
||||||
|
|
||||||
if (incb != 1) {
|
if (incb != 1) {
|
||||||
B = buffer;
|
B = buffer;
|
||||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095);
|
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15);
|
||||||
COPY_K(m, b, incb, buffer, 1);
|
COPY_K(m, b, incb, buffer, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -48,8 +48,7 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
# TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination
|
# TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination
|
||||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type})
|
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type})
|
||||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type})
|
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type})
|
||||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type})
|
|
||||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type})
|
|
||||||
# Need to set CONJ for trmm and trsm
|
# Need to set CONJ for trmm and trsm
|
||||||
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type})
|
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type})
|
||||||
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type})
|
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type})
|
||||||
|
@ -72,6 +71,10 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
|
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type})
|
||||||
|
|
||||||
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||||
|
#herk
|
||||||
|
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type})
|
||||||
|
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type})
|
||||||
|
|
||||||
#hemm
|
#hemm
|
||||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
|
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type})
|
||||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
|
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type})
|
||||||
|
@ -96,6 +99,17 @@ foreach (float_type ${FLOAT_TYPES})
|
||||||
endif()
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
|
# for gemm3m
|
||||||
|
if(USE_GEMM3M)
|
||||||
|
foreach (GEMM_DEFINE ${GEMM_DEFINES})
|
||||||
|
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
|
||||||
|
GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE}" "gemm3m_${GEMM_DEFINE_LC}" false "" "" false ${float_type})
|
||||||
|
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3)
|
||||||
|
GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm3m_thread_${GEMM_DEFINE_LC}" false "" "" false ${float_type})
|
||||||
|
endif ()
|
||||||
|
endforeach ()
|
||||||
|
endif()
|
||||||
endif ()
|
endif ()
|
||||||
endforeach ()
|
endforeach ()
|
||||||
|
|
||||||
|
|
|
@ -65,7 +65,7 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
|
||||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||||
|
|
||||||
BLASLONG range_M[MAX_CPU_NUMBER + 1], range_N[MAX_CPU_NUMBER + 1];
|
BLASLONG range_M[MAX_CPU_NUMBER + 1], range_N[MAX_CPU_NUMBER + 1];
|
||||||
BLASLONG procs, total_procs, num_cpu_m, num_cpu_n;
|
BLASLONG procs, num_cpu_m, num_cpu_n;
|
||||||
|
|
||||||
BLASLONG width, i, j;
|
BLASLONG width, i, j;
|
||||||
BLASLONG divM, divN;
|
BLASLONG divM, divN;
|
||||||
|
|
|
@ -335,7 +335,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||||
else
|
else
|
||||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -230,7 +230,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
BLASLONG is, min_i, div_n;
|
BLASLONG is, min_i, div_n;
|
||||||
|
|
||||||
BLASLONG i, current;
|
BLASLONG i, current;
|
||||||
BLASLONG l1stride, l2size;
|
BLASLONG l1stride;
|
||||||
|
|
||||||
#ifdef TIMING
|
#ifdef TIMING
|
||||||
BLASULONG rpcc_counter;
|
BLASULONG rpcc_counter;
|
||||||
|
@ -298,8 +298,6 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
#endif
|
#endif
|
||||||
) return 0;
|
) return 0;
|
||||||
|
|
||||||
l2size = GEMM_P * GEMM_Q;
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld N_from : %ld N_to : %ld\n",
|
fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld N_from : %ld N_to : %ld\n",
|
||||||
mypos, m_from, m_to, n_from, n_to, N_from, N_to);
|
mypos, m_from, m_to, n_from, n_to, N_from, N_to);
|
||||||
|
@ -369,7 +367,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||||
|
|
||||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
|
||||||
else
|
else
|
||||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
|
||||||
|
else
|
||||||
|
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
|
||||||
|
|
||||||
|
|
||||||
START_RPCC();
|
START_RPCC();
|
||||||
|
@ -706,7 +706,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
||||||
n = n_to - n_from;
|
n = n_to - n_from;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) {
|
if ((m < nthreads * SWITCH_RATIO) || (n < nthreads * SWITCH_RATIO)) {
|
||||||
GEMM_LOCAL(args, range_m, range_n, sa, sb, 0);
|
GEMM_LOCAL(args, range_m, range_n, sa, sb, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,6 +33,7 @@ set(COMMON_SOURCES
|
||||||
xerbla.c
|
xerbla.c
|
||||||
openblas_set_num_threads.c
|
openblas_set_num_threads.c
|
||||||
openblas_error_handle.c
|
openblas_error_handle.c
|
||||||
|
openblas_env.c
|
||||||
openblas_get_num_procs.c
|
openblas_get_num_procs.c
|
||||||
openblas_get_num_threads.c
|
openblas_get_num_threads.c
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
TOPDIR = ../..
|
TOPDIR = ../..
|
||||||
include ../../Makefile.system
|
include ../../Makefile.system
|
||||||
|
|
||||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_num_threads.$(SUFFIX) openblas_get_num_procs.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)
|
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_num_threads.$(SUFFIX) openblas_get_num_procs.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX) openblas_env.$(SUFFIX)
|
||||||
|
|
||||||
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||||
|
|
||||||
|
@ -118,6 +118,9 @@ openblas_get_parallel.$(SUFFIX) : openblas_get_parallel.c
|
||||||
openblas_error_handle.$(SUFFIX) : openblas_error_handle.c
|
openblas_error_handle.$(SUFFIX) : openblas_error_handle.c
|
||||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
|
openblas_env.$(SUFFIX) : openblas_env.c
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||||
|
|
||||||
|
|
|
@ -70,7 +70,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID)
|
#if defined(OS_LINUX) || defined(OS_NETBSD) || defined(OS_DARWIN) || defined(OS_ANDROID) || defined(OS_SUNOS)
|
||||||
#include <dlfcn.h>
|
#include <dlfcn.h>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
|
@ -92,6 +92,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
extern unsigned int openblas_thread_timeout();
|
||||||
|
|
||||||
#ifdef SMP_SERVER
|
#ifdef SMP_SERVER
|
||||||
|
|
||||||
#undef MONITOR
|
#undef MONITOR
|
||||||
|
@ -524,6 +526,7 @@ static int blas_monitor(void *arg){
|
||||||
int blas_thread_init(void){
|
int blas_thread_init(void){
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
int ret;
|
int ret;
|
||||||
|
int thread_timeout_env;
|
||||||
#ifdef NEED_STACKATTR
|
#ifdef NEED_STACKATTR
|
||||||
pthread_attr_t attr;
|
pthread_attr_t attr;
|
||||||
#endif
|
#endif
|
||||||
|
@ -540,22 +543,12 @@ int blas_thread_init(void){
|
||||||
|
|
||||||
if (!blas_server_avail){
|
if (!blas_server_avail){
|
||||||
|
|
||||||
env_var_t p;
|
thread_timeout_env=openblas_thread_timeout();
|
||||||
|
if (thread_timeout_env>0) {
|
||||||
if (readenv(p,"THREAD_TIMEOUT")) {
|
if (thread_timeout_env < 4) thread_timeout_env = 4;
|
||||||
thread_timeout = atoi(p);
|
if (thread_timeout_env > 30) thread_timeout_env = 30;
|
||||||
if (thread_timeout < 4) thread_timeout = 4;
|
thread_timeout = (1 << thread_timeout_env);
|
||||||
if (thread_timeout > 30) thread_timeout = 30;
|
}
|
||||||
thread_timeout = (1 << thread_timeout);
|
|
||||||
}else{
|
|
||||||
if (readenv(p,"GOTO_THREAD_TIMEOUT")) {
|
|
||||||
thread_timeout = atoi(p);
|
|
||||||
if (thread_timeout < 4) thread_timeout = 4;
|
|
||||||
if (thread_timeout > 30) thread_timeout = 30;
|
|
||||||
thread_timeout = (1 << thread_timeout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
for(i = 0; i < blas_num_threads - 1; i++){
|
for(i = 0; i < blas_num_threads - 1; i++){
|
||||||
|
|
||||||
|
@ -576,10 +569,12 @@ int blas_thread_init(void){
|
||||||
struct rlimit rlim;
|
struct rlimit rlim;
|
||||||
const char *msg = strerror(ret);
|
const char *msg = strerror(ret);
|
||||||
fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg);
|
fprintf(STDERR, "OpenBLAS blas_thread_init: pthread_create: %s\n", msg);
|
||||||
|
#ifdef RLIMIT_NPROC
|
||||||
if(0 == getrlimit(RLIMIT_NPROC, &rlim)) {
|
if(0 == getrlimit(RLIMIT_NPROC, &rlim)) {
|
||||||
fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC "
|
fprintf(STDERR, "OpenBLAS blas_thread_init: RLIMIT_NPROC "
|
||||||
"%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max));
|
"%ld current, %ld max\n", (long)(rlim.rlim_cur), (long)(rlim.rlim_max));
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
if(0 != raise(SIGINT)) {
|
if(0 != raise(SIGINT)) {
|
||||||
fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n");
|
fprintf(STDERR, "OpenBLAS blas_thread_init: calling exit(3)\n");
|
||||||
exit(EXIT_FAILURE);
|
exit(EXIT_FAILURE);
|
||||||
|
|
|
@ -261,6 +261,11 @@ static gotoblas_t *get_coretype(void){
|
||||||
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_NEHALEM; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
//Intel Avoton
|
||||||
|
if (model == 13) {
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
|
||||||
|
return &gotoblas_NEHALEM;
|
||||||
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
case 5:
|
case 5:
|
||||||
//Intel Broadwell
|
//Intel Broadwell
|
||||||
|
@ -318,7 +323,7 @@ static gotoblas_t *get_coretype(void){
|
||||||
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
|
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
|
||||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
}else if(model == 2){
|
}else if(model == 2 || model == 3){
|
||||||
//AMD Bulldozer Opteron 6300 / Opteron 4300 / Opteron 3300
|
//AMD Bulldozer Opteron 6300 / Opteron 4300 / Opteron 3300
|
||||||
if(support_avx())
|
if(support_avx())
|
||||||
return &gotoblas_PILEDRIVER;
|
return &gotoblas_PILEDRIVER;
|
||||||
|
@ -327,7 +332,15 @@ static gotoblas_t *get_coretype(void){
|
||||||
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||||
}
|
}
|
||||||
}else if(model == 0){
|
}else if(model == 0){
|
||||||
if (exmodel == 3) {
|
if (exmodel == 1) {
|
||||||
|
//AMD Trinity
|
||||||
|
if(support_avx())
|
||||||
|
return &gotoblas_PILEDRIVER;
|
||||||
|
else{
|
||||||
|
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
|
||||||
|
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
|
||||||
|
}
|
||||||
|
}else if (exmodel == 3) {
|
||||||
//AMD STEAMROLLER
|
//AMD STEAMROLLER
|
||||||
if(support_avx())
|
if(support_avx())
|
||||||
return &gotoblas_STEAMROLLER;
|
return &gotoblas_STEAMROLLER;
|
||||||
|
@ -378,7 +391,7 @@ static char *corename[] = {
|
||||||
"Nehalem",
|
"Nehalem",
|
||||||
"Athlon",
|
"Athlon",
|
||||||
"Opteron",
|
"Opteron",
|
||||||
"Opteron(SSE3)",
|
"Opteron_SSE3",
|
||||||
"Barcelona",
|
"Barcelona",
|
||||||
"Nano",
|
"Nano",
|
||||||
"Sandybridge",
|
"Sandybridge",
|
||||||
|
|
|
@ -104,6 +104,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <linux/unistd.h>
|
#include <linux/unistd.h>
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||||
|
@ -142,7 +144,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#if defined(_MSC_VER) && !defined(__clang__)
|
#if defined(_MSC_VER) && !defined(__clang__)
|
||||||
#define CONSTRUCTOR __cdecl
|
#define CONSTRUCTOR __cdecl
|
||||||
#define DESTRUCTOR __cdecl
|
#define DESTRUCTOR __cdecl
|
||||||
#elif defined(OS_DARWIN) && defined(C_GCC)
|
#elif (defined(OS_DARWIN) || defined(OS_SUNOS)) && defined(C_GCC)
|
||||||
#define CONSTRUCTOR __attribute__ ((constructor))
|
#define CONSTRUCTOR __attribute__ ((constructor))
|
||||||
#define DESTRUCTOR __attribute__ ((destructor))
|
#define DESTRUCTOR __attribute__ ((destructor))
|
||||||
#else
|
#else
|
||||||
|
@ -167,7 +169,7 @@ void goto_set_num_threads(int num_threads) {};
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
#ifdef OS_LINUX
|
#if defined(OS_LINUX) || defined(OS_SUNOS)
|
||||||
#ifndef NO_AFFINITY
|
#ifndef NO_AFFINITY
|
||||||
int get_num_procs(void);
|
int get_num_procs(void);
|
||||||
#else
|
#else
|
||||||
|
@ -292,8 +294,11 @@ void openblas_fork_handler()
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
extern int openblas_num_threads_env();
|
||||||
|
extern int openblas_goto_num_threads_env();
|
||||||
|
extern int openblas_omp_num_threads_env();
|
||||||
|
|
||||||
int blas_get_cpu_number(void){
|
int blas_get_cpu_number(void){
|
||||||
env_var_t p;
|
|
||||||
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID)
|
#if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) || defined(OS_ANDROID)
|
||||||
int max_num;
|
int max_num;
|
||||||
#endif
|
#endif
|
||||||
|
@ -308,18 +313,18 @@ int blas_get_cpu_number(void){
|
||||||
|
|
||||||
blas_goto_num = 0;
|
blas_goto_num = 0;
|
||||||
#ifndef USE_OPENMP
|
#ifndef USE_OPENMP
|
||||||
if (readenv(p,"OPENBLAS_NUM_THREADS")) blas_goto_num = atoi(p);
|
blas_goto_num=openblas_num_threads_env();
|
||||||
if (blas_goto_num < 0) blas_goto_num = 0;
|
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||||
|
|
||||||
if (blas_goto_num == 0) {
|
if (blas_goto_num == 0) {
|
||||||
if (readenv(p,"GOTO_NUM_THREADS")) blas_goto_num = atoi(p);
|
blas_goto_num=openblas_goto_num_threads_env();
|
||||||
if (blas_goto_num < 0) blas_goto_num = 0;
|
if (blas_goto_num < 0) blas_goto_num = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
blas_omp_num = 0;
|
blas_omp_num = 0;
|
||||||
if (readenv(p,"OMP_NUM_THREADS")) blas_omp_num = atoi(p);
|
blas_omp_num=openblas_omp_num_threads_env();
|
||||||
if (blas_omp_num < 0) blas_omp_num = 0;
|
if (blas_omp_num < 0) blas_omp_num = 0;
|
||||||
|
|
||||||
if (blas_goto_num > 0) blas_num_threads = blas_goto_num;
|
if (blas_goto_num > 0) blas_num_threads = blas_goto_num;
|
||||||
|
@ -355,7 +360,9 @@ int openblas_get_num_threads(void) {
|
||||||
#ifndef SMP
|
#ifndef SMP
|
||||||
return 1;
|
return 1;
|
||||||
#else
|
#else
|
||||||
return blas_get_cpu_number();
|
// init blas_cpu_number if needed
|
||||||
|
blas_get_cpu_number();
|
||||||
|
return blas_cpu_number;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -914,7 +921,6 @@ static volatile struct {
|
||||||
} memory[NUM_BUFFERS];
|
} memory[NUM_BUFFERS];
|
||||||
|
|
||||||
static int memory_initialized = 0;
|
static int memory_initialized = 0;
|
||||||
static void gotoblas_memory_init(void);
|
|
||||||
|
|
||||||
/* Memory allocation routine */
|
/* Memory allocation routine */
|
||||||
/* procpos ... indicates where it comes from */
|
/* procpos ... indicates where it comes from */
|
||||||
|
@ -1337,6 +1343,7 @@ static void gotoblas_memory_init(void) {
|
||||||
/* Initialization for all function; this function should be called before main */
|
/* Initialization for all function; this function should be called before main */
|
||||||
|
|
||||||
static int gotoblas_initialized = 0;
|
static int gotoblas_initialized = 0;
|
||||||
|
extern void openblas_read_env();
|
||||||
|
|
||||||
void CONSTRUCTOR gotoblas_init(void) {
|
void CONSTRUCTOR gotoblas_init(void) {
|
||||||
|
|
||||||
|
@ -1346,6 +1353,8 @@ void CONSTRUCTOR gotoblas_init(void) {
|
||||||
openblas_fork_handler();
|
openblas_fork_handler();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
openblas_read_env();
|
||||||
|
|
||||||
#ifdef PROFILE
|
#ifdef PROFILE
|
||||||
moncontrol (0);
|
moncontrol (0);
|
||||||
#endif
|
#endif
|
||||||
|
@ -1362,6 +1371,19 @@ void CONSTRUCTOR gotoblas_init(void) {
|
||||||
gotoblas_memory_init();
|
gotoblas_memory_init();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
//#if defined(OS_LINUX)
|
||||||
|
#if 0
|
||||||
|
struct rlimit curlimit;
|
||||||
|
if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 )
|
||||||
|
{
|
||||||
|
if ( curlimit.rlim_cur != curlimit.rlim_max )
|
||||||
|
{
|
||||||
|
curlimit.rlim_cur = curlimit.rlim_max;
|
||||||
|
setrlimit(RLIMIT_STACK, &curlimit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
if (blas_cpu_number == 0) blas_get_cpu_number();
|
if (blas_cpu_number == 0) blas_get_cpu_number();
|
||||||
#ifdef SMP_SERVER
|
#ifdef SMP_SERVER
|
||||||
|
|
|
@ -0,0 +1,84 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2011-2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
static int openblas_env_verbose=0;
|
||||||
|
static unsigned int openblas_env_thread_timeout=0;
|
||||||
|
static int openblas_env_block_factor=0;
|
||||||
|
static int openblas_env_openblas_num_threads=0;
|
||||||
|
static int openblas_env_goto_num_threads=0;
|
||||||
|
static int openblas_env_omp_num_threads=0;
|
||||||
|
|
||||||
|
int openblas_verbose() { return openblas_env_verbose;}
|
||||||
|
unsigned int openblas_thread_timeout() { return openblas_env_thread_timeout;}
|
||||||
|
int openblas_block_factor() { return openblas_env_block_factor;}
|
||||||
|
int openblas_num_threads_env() { return openblas_env_openblas_num_threads;}
|
||||||
|
int openblas_goto_num_threads_env() { return openblas_env_goto_num_threads;}
|
||||||
|
int openblas_omp_num_threads_env() { return openblas_env_omp_num_threads;}
|
||||||
|
|
||||||
|
void openblas_read_env() {
|
||||||
|
int ret=0;
|
||||||
|
env_var_t p;
|
||||||
|
if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p);
|
||||||
|
if(ret<0) ret=0;
|
||||||
|
openblas_env_verbose=ret;
|
||||||
|
|
||||||
|
ret=0;
|
||||||
|
if (readenv(p,"OPENBLAS_BLOCK_FACTOR")) ret = atoi(p);
|
||||||
|
if(ret<0) ret=0;
|
||||||
|
openblas_env_block_factor=ret;
|
||||||
|
|
||||||
|
ret=0;
|
||||||
|
if (readenv(p,"OPENBLAS_THREAD_TIMEOUT")) ret = atoi(p);
|
||||||
|
if(ret<0) ret=0;
|
||||||
|
openblas_env_thread_timeout=(unsigned int)ret;
|
||||||
|
|
||||||
|
ret=0;
|
||||||
|
if (readenv(p,"OPENBLAS_NUM_THREADS")) ret = atoi(p);
|
||||||
|
if(ret<0) ret=0;
|
||||||
|
openblas_env_openblas_num_threads=ret;
|
||||||
|
|
||||||
|
ret=0;
|
||||||
|
if (readenv(p,"GOTO_NUM_THREADS")) ret = atoi(p);
|
||||||
|
if(ret<0) ret=0;
|
||||||
|
openblas_env_goto_num_threads=ret;
|
||||||
|
|
||||||
|
ret=0;
|
||||||
|
if (readenv(p,"OMP_NUM_THREADS")) ret = atoi(p);
|
||||||
|
if(ret<0) ret=0;
|
||||||
|
openblas_env_omp_num_threads=ret;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,13 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
int openblas_verbose() {
|
extern int openblas_verbose();
|
||||||
int ret=0;
|
|
||||||
env_var_t p;
|
|
||||||
if (readenv(p,"OPENBLAS_VERBOSE")) ret = atoi(p);
|
|
||||||
if(ret<0) ret=0;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void openblas_warning(int verbose, const char * msg) {
|
void openblas_warning(int verbose, const char * msg) {
|
||||||
int current_verbose;
|
int current_verbose;
|
||||||
|
|
|
@ -40,6 +40,7 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
extern int openblas_block_factor();
|
||||||
int get_L2_size(void);
|
int get_L2_size(void);
|
||||||
|
|
||||||
#define DEFAULT_GEMM_P 128
|
#define DEFAULT_GEMM_P 128
|
||||||
|
@ -249,7 +250,6 @@ int get_L2_size(void){
|
||||||
|
|
||||||
void blas_set_parameter(void){
|
void blas_set_parameter(void){
|
||||||
|
|
||||||
env_var_t p;
|
|
||||||
int factor;
|
int factor;
|
||||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER)
|
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER)
|
||||||
int size = 16;
|
int size = 16;
|
||||||
|
@ -468,9 +468,8 @@ void blas_set_parameter(void){
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
factor=openblas_block_factor();
|
||||||
if (readenv(p,"GOTO_BLOCK_FACTOR")) {
|
if (factor>0) {
|
||||||
factor = atoi(p);
|
|
||||||
if (factor < 10) factor = 10;
|
if (factor < 10) factor = 10;
|
||||||
if (factor > 200) factor = 200;
|
if (factor > 200) factor = 200;
|
||||||
|
|
||||||
|
|
|
@ -26,10 +26,16 @@ ifndef ONLY_CBLAS
|
||||||
ONLY_CBLAS = 0
|
ONLY_CBLAS = 0
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef BUILD_LAPACK_DEPRECATED
|
||||||
|
BUILD_LAPACK_DEPRECATED = 0
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), WINNT)
|
ifeq ($(OSNAME), WINNT)
|
||||||
ifeq ($(F_COMPILER), GFORTRAN)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
|
ifndef ONLY_CBLAS
|
||||||
EXTRALIB += -lgfortran
|
EXTRALIB += -lgfortran
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
ifeq ($(C_COMPILER), GCC)
|
ifeq ($(C_COMPILER), GCC)
|
||||||
EXTRALIB += -lgomp
|
EXTRALIB += -lgomp
|
||||||
|
@ -39,9 +45,11 @@ endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), CYGWIN_NT)
|
ifeq ($(OSNAME), CYGWIN_NT)
|
||||||
ifeq ($(F_COMPILER), GFORTRAN)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
|
ifndef ONLY_CBLAS
|
||||||
EXTRALIB += -lgfortran
|
EXTRALIB += -lgfortran
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
all::
|
all::
|
||||||
|
|
||||||
|
@ -88,17 +96,17 @@ dll : ../$(LIBDLLNAME)
|
||||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
|
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
|
||||||
|
|
||||||
libopenblas.def : gensymbol
|
libopenblas.def : gensymbol
|
||||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
|
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
||||||
|
|
||||||
libgoto_hpl.def : gensymbol
|
libgoto_hpl.def : gensymbol
|
||||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
|
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
||||||
|
|
||||||
ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX))
|
ifeq (, $(SYMBOLPREFIX)$(SYMBOLSUFFIX))
|
||||||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
$(LIBDYNNAME) : ../$(LIBNAME) osx.def
|
||||||
else
|
else
|
||||||
../$(LIBNAME).renamed : ../$(LIBNAME) objconv.def
|
../$(LIBNAME).osx.renamed : ../$(LIBNAME) objconv.def
|
||||||
$(OBJCONV) @objconv.def ../$(LIBNAME) ../$(LIBNAME).renamed
|
$(OBJCONV) @objconv.def ../$(LIBNAME) ../$(LIBNAME).osx.renamed
|
||||||
$(LIBDYNNAME) : ../$(LIBNAME).renamed osx.def
|
$(LIBDYNNAME) : ../$(LIBNAME).osx.renamed osx.def
|
||||||
endif
|
endif
|
||||||
ifeq ($(NOFORTRAN), $(filter $(NOFORTRAN),1 2))
|
ifeq ($(NOFORTRAN), $(filter $(NOFORTRAN),1 2))
|
||||||
#only build without Fortran
|
#only build without Fortran
|
||||||
|
@ -110,7 +118,7 @@ endif
|
||||||
dllinit.$(SUFFIX) : dllinit.c
|
dllinit.$(SUFFIX) : dllinit.c
|
||||||
$(CC) $(CFLAGS) -c -o $(@F) -s $<
|
$(CC) $(CFLAGS) -c -o $(@F) -s $<
|
||||||
|
|
||||||
ifeq ($(OSNAME), Linux)
|
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS))
|
||||||
|
|
||||||
so : ../$(LIBSONAME)
|
so : ../$(LIBSONAME)
|
||||||
|
|
||||||
|
@ -201,26 +209,26 @@ static : ../$(LIBNAME)
|
||||||
rm -f goto.$(SUFFIX)
|
rm -f goto.$(SUFFIX)
|
||||||
|
|
||||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
|
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
||||||
|
|
||||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
|
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
||||||
|
|
||||||
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
|
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
||||||
|
|
||||||
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > $(@F)
|
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
|
||||||
|
|
||||||
test : linktest.c
|
test : linktest.c
|
||||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||||
rm -f linktest
|
rm -f linktest
|
||||||
|
|
||||||
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
linktest.c : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" > linktest.c
|
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > linktest.c
|
||||||
|
|
||||||
clean ::
|
clean ::
|
||||||
@rm -f *.def *.dylib __.SYMDEF*
|
@rm -f *.def *.dylib __.SYMDEF* *.renamed
|
||||||
|
|
||||||
include ../Makefile.tail
|
include ../Makefile.tail
|
||||||
|
|
||||||
|
|
|
@ -173,18 +173,18 @@
|
||||||
sgbbrd, sgbcon, sgbequ, sgbrfs, sgbsv,
|
sgbbrd, sgbcon, sgbequ, sgbrfs, sgbsv,
|
||||||
sgbsvx, sgbtf2, sgbtrf, sgbtrs, sgebak, sgebal, sgebd2,
|
sgbsvx, sgbtf2, sgbtrf, sgbtrs, sgebak, sgebal, sgebd2,
|
||||||
sgebrd, sgecon, sgeequ, sgees, sgeesx, sgeev, sgeevx,
|
sgebrd, sgecon, sgeequ, sgees, sgeesx, sgeev, sgeevx,
|
||||||
sgegs, sgegv, sgehd2, sgehrd, sgelq2, sgelqf,
|
sgehd2, sgehrd, sgelq2, sgelqf,
|
||||||
sgels, sgelsd, sgelss, sgelsx, sgelsy, sgeql2, sgeqlf,
|
sgels, sgelsd, sgelss, sgelsy, sgeql2, sgeqlf,
|
||||||
sgeqp3, sgeqpf, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs,
|
sgeqp3, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs,
|
||||||
sgerq2, sgerqf, sgesc2, sgesdd, sgesvd, sgesvx,
|
sgerq2, sgerqf, sgesc2, sgesdd, sgesvd, sgesvx,
|
||||||
sgetc2, sgetri,
|
sgetc2, sgetri,
|
||||||
sggbak, sggbal, sgges, sggesx, sggev, sggevx,
|
sggbak, sggbal, sgges, sggesx, sggev, sggevx,
|
||||||
sggglm, sgghrd, sgglse, sggqrf,
|
sggglm, sgghrd, sgglse, sggqrf,
|
||||||
sggrqf, sggsvd, sggsvp, sgtcon, sgtrfs, sgtsv,
|
sggrqf, sgtcon, sgtrfs, sgtsv,
|
||||||
sgtsvx, sgttrf, sgttrs, sgtts2, shgeqz,
|
sgtsvx, sgttrf, sgttrs, sgtts2, shgeqz,
|
||||||
shsein, shseqr, slabrd, slacon, slacn2,
|
shsein, shseqr, slabrd, slacon, slacn2,
|
||||||
slaein, slaexc, slag2, slags2, slagtm, slagv2, slahqr,
|
slaein, slaexc, slag2, slags2, slagtm, slagv2, slahqr,
|
||||||
slahrd, slahr2, slaic1, slaln2, slals0, slalsa, slalsd,
|
slahr2, slaic1, slaln2, slals0, slalsa, slalsd,
|
||||||
slangb, slange, slangt, slanhs, slansb, slansp,
|
slangb, slange, slangt, slanhs, slansb, slansp,
|
||||||
slansy, slantb, slantp, slantr, slanv2,
|
slansy, slantb, slantp, slantr, slanv2,
|
||||||
slapll, slapmt,
|
slapll, slapmt,
|
||||||
|
@ -194,7 +194,7 @@
|
||||||
slarf, slarfb, slarfg, slarfgp, slarft, slarfx, slargv,
|
slarf, slarfb, slarfg, slarfgp, slarft, slarfx, slargv,
|
||||||
slarrv, slartv,
|
slarrv, slartv,
|
||||||
slarz, slarzb, slarzt, slasy2, slasyf,
|
slarz, slarzb, slarzt, slasy2, slasyf,
|
||||||
slatbs, slatdf, slatps, slatrd, slatrs, slatrz, slatzm,
|
slatbs, slatdf, slatps, slatrd, slatrs, slatrz,
|
||||||
sopgtr, sopmtr, sorg2l, sorg2r,
|
sopgtr, sopmtr, sorg2l, sorg2r,
|
||||||
sorgbr, sorghr, sorgl2, sorglq, sorgql, sorgqr, sorgr2,
|
sorgbr, sorghr, sorgl2, sorglq, sorgql, sorgqr, sorgr2,
|
||||||
sorgrq, sorgtr, sorm2l, sorm2r,
|
sorgrq, sorgtr, sorm2l, sorm2r,
|
||||||
|
@ -220,7 +220,7 @@
|
||||||
stgsja, stgsna, stgsy2, stgsyl, stpcon, stprfs, stptri,
|
stgsja, stgsna, stgsy2, stgsyl, stpcon, stprfs, stptri,
|
||||||
stptrs,
|
stptrs,
|
||||||
strcon, strevc, strexc, strrfs, strsen, strsna, strsyl,
|
strcon, strevc, strexc, strrfs, strsen, strsna, strsyl,
|
||||||
strtrs, stzrqf, stzrzf, sstemr,
|
strtrs, stzrzf, sstemr,
|
||||||
slansf, spftrf, spftri, spftrs, ssfrk, stfsm, stftri, stfttp,
|
slansf, spftrf, spftri, spftrs, ssfrk, stfsm, stftri, stfttp,
|
||||||
stfttr, stpttf, stpttr, strttf, strttp,
|
stfttr, stpttf, stpttr, strttf, strttp,
|
||||||
sgejsv, sgesvj, sgsvj0, sgsvj1,
|
sgejsv, sgesvj, sgsvj0, sgsvj1,
|
||||||
|
@ -245,14 +245,13 @@
|
||||||
cbdsqr, cgbbrd, cgbcon, cgbequ, cgbrfs, cgbsv, cgbsvx,
|
cbdsqr, cgbbrd, cgbcon, cgbequ, cgbrfs, cgbsv, cgbsvx,
|
||||||
cgbtf2, cgbtrf, cgbtrs, cgebak, cgebal, cgebd2, cgebrd,
|
cgbtf2, cgbtrf, cgbtrs, cgebak, cgebal, cgebd2, cgebrd,
|
||||||
cgecon, cgeequ, cgees, cgeesx, cgeev, cgeevx,
|
cgecon, cgeequ, cgees, cgeesx, cgeev, cgeevx,
|
||||||
cgegs, cgegv, cgehd2, cgehrd, cgelq2, cgelqf,
|
cgehd2, cgehrd, cgelq2, cgelqf,
|
||||||
cgels, cgelsd, cgelss, cgelsx, cgelsy, cgeql2, cgeqlf, cgeqp3,
|
cgels, cgelsd, cgelss, cgelsy, cgeql2, cgeqlf, cgeqp3,
|
||||||
cgeqpf, cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs,
|
cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs,
|
||||||
cgerq2, cgerqf, cgesc2, cgesdd, cgesvd,
|
cgerq2, cgerqf, cgesc2, cgesdd, cgesvd,
|
||||||
cgesvx, cgetc2, cgetri,
|
cgesvx, cgetc2, cgetri,
|
||||||
cggbak, cggbal, cgges, cggesx, cggev, cggevx, cggglm,
|
cggbak, cggbal, cgges, cggesx, cggev, cggevx, cggglm,
|
||||||
cgghrd, cgglse, cggqrf, cggrqf,
|
cgghrd, cgglse, cggqrf, cggrqf,
|
||||||
cggsvd, cggsvp,
|
|
||||||
cgtcon, cgtrfs, cgtsv, cgtsvx, cgttrf, cgttrs, cgtts2, chbev,
|
cgtcon, cgtrfs, cgtsv, cgtsvx, cgttrf, cgttrs, cgtts2, chbev,
|
||||||
chbevd, chbevx, chbgst, chbgv, chbgvd, chbgvx, chbtrd,
|
chbevd, chbevx, chbgst, chbgv, chbgvd, chbgvx, chbtrd,
|
||||||
checon, cheev, cheevd, cheevr, cheevx, chegs2, chegst,
|
checon, cheev, cheevd, cheevr, cheevx, chegs2, chegst,
|
||||||
|
@ -267,7 +266,7 @@
|
||||||
claed0, claed7, claed8,
|
claed0, claed7, claed8,
|
||||||
claein, claesy, claev2, clags2, clagtm,
|
claein, claesy, claev2, clags2, clagtm,
|
||||||
clahef, clahqr,
|
clahef, clahqr,
|
||||||
clahrd, clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt,
|
clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt,
|
||||||
clanhb, clanhe,
|
clanhb, clanhe,
|
||||||
clanhp, clanhs, clanht, clansb, clansp, clansy, clantb,
|
clanhp, clanhs, clanht, clansb, clansp, clansy, clantb,
|
||||||
clantp, clantr, clapll, clapmt, clarcm, claqgb, claqge,
|
clantp, clantr, clapll, clapmt, clarcm, claqgb, claqge,
|
||||||
|
@ -278,7 +277,7 @@
|
||||||
clarfx, clargv, clarnv, clarrv, clartg, clartv,
|
clarfx, clargv, clarnv, clarrv, clartg, clartv,
|
||||||
clarz, clarzb, clarzt, clascl, claset, clasr, classq,
|
clarz, clarzb, clarzt, clascl, claset, clasr, classq,
|
||||||
clasyf, clatbs, clatdf, clatps, clatrd, clatrs, clatrz,
|
clasyf, clatbs, clatdf, clatps, clatrd, clatrs, clatrz,
|
||||||
clatzm, cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv,
|
cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv,
|
||||||
cpbsvx, cpbtf2, cpbtrf, cpbtrs, cpocon, cpoequ, cporfs,
|
cpbsvx, cpbtf2, cpbtrf, cpbtrs, cpocon, cpoequ, cporfs,
|
||||||
cposv, cposvx, cpstrf, cpstf2,
|
cposv, cposvx, cpstrf, cpstf2,
|
||||||
cppcon, cppequ, cpprfs, cppsv, cppsvx, cpptrf, cpptri, cpptrs,
|
cppcon, cppequ, cpprfs, cppsv, cppsvx, cpptrf, cpptri, cpptrs,
|
||||||
|
@ -293,7 +292,7 @@
|
||||||
ctgexc, ctgsen, ctgsja, ctgsna, ctgsy2, ctgsyl, ctpcon,
|
ctgexc, ctgsen, ctgsja, ctgsna, ctgsy2, ctgsyl, ctpcon,
|
||||||
ctprfs, ctptri,
|
ctprfs, ctptri,
|
||||||
ctptrs, ctrcon, ctrevc, ctrexc, ctrrfs, ctrsen, ctrsna,
|
ctptrs, ctrcon, ctrevc, ctrexc, ctrrfs, ctrsen, ctrsna,
|
||||||
ctrsyl, ctrtrs, ctzrqf, ctzrzf, cung2l, cung2r,
|
ctrsyl, ctrtrs, ctzrzf, cung2l, cung2r,
|
||||||
cungbr, cunghr, cungl2, cunglq, cungql, cungqr, cungr2,
|
cungbr, cunghr, cungl2, cunglq, cungql, cungqr, cungr2,
|
||||||
cungrq, cungtr, cunm2l, cunm2r, cunmbr, cunmhr, cunml2,
|
cungrq, cungtr, cunm2l, cunm2r, cunmbr, cunmhr, cunml2,
|
||||||
cunmlq, cunmql, cunmqr, cunmr2, cunmr3, cunmrq, cunmrz,
|
cunmlq, cunmql, cunmqr, cunmr2, cunmr3, cunmrq, cunmrz,
|
||||||
|
@ -321,18 +320,18 @@
|
||||||
dgbbrd, dgbcon, dgbequ, dgbrfs, dgbsv,
|
dgbbrd, dgbcon, dgbequ, dgbrfs, dgbsv,
|
||||||
dgbsvx, dgbtf2, dgbtrf, dgbtrs, dgebak, dgebal, dgebd2,
|
dgbsvx, dgbtf2, dgbtrf, dgbtrs, dgebak, dgebal, dgebd2,
|
||||||
dgebrd, dgecon, dgeequ, dgees, dgeesx, dgeev, dgeevx,
|
dgebrd, dgecon, dgeequ, dgees, dgeesx, dgeev, dgeevx,
|
||||||
dgegs, dgegv, dgehd2, dgehrd, dgelq2, dgelqf,
|
dgehd2, dgehrd, dgelq2, dgelqf,
|
||||||
dgels, dgelsd, dgelss, dgelsx, dgelsy, dgeql2, dgeqlf,
|
dgels, dgelsd, dgelss, dgelsy, dgeql2, dgeqlf,
|
||||||
dgeqp3, dgeqpf, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs,
|
dgeqp3, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs,
|
||||||
dgerq2, dgerqf, dgesc2, dgesdd, dgesvd, dgesvx,
|
dgerq2, dgerqf, dgesc2, dgesdd, dgesvd, dgesvx,
|
||||||
dgetc2, dgetri,
|
dgetc2, dgetri,
|
||||||
dggbak, dggbal, dgges, dggesx, dggev, dggevx,
|
dggbak, dggbal, dgges, dggesx, dggev, dggevx,
|
||||||
dggglm, dgghrd, dgglse, dggqrf,
|
dggglm, dgghrd, dgglse, dggqrf,
|
||||||
dggrqf, dggsvd, dggsvp, dgtcon, dgtrfs, dgtsv,
|
dggrqf, dgtcon, dgtrfs, dgtsv,
|
||||||
dgtsvx, dgttrf, dgttrs, dgtts2, dhgeqz,
|
dgtsvx, dgttrf, dgttrs, dgtts2, dhgeqz,
|
||||||
dhsein, dhseqr, dlabrd, dlacon, dlacn2,
|
dhsein, dhseqr, dlabrd, dlacon, dlacn2,
|
||||||
dlaein, dlaexc, dlag2, dlags2, dlagtm, dlagv2, dlahqr,
|
dlaein, dlaexc, dlag2, dlags2, dlagtm, dlagv2, dlahqr,
|
||||||
dlahrd, dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd,
|
dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd,
|
||||||
dlangb, dlange, dlangt, dlanhs, dlansb, dlansp,
|
dlangb, dlange, dlangt, dlanhs, dlansb, dlansp,
|
||||||
dlansy, dlantb, dlantp, dlantr, dlanv2,
|
dlansy, dlantb, dlantp, dlantr, dlanv2,
|
||||||
dlapll, dlapmt,
|
dlapll, dlapmt,
|
||||||
|
@ -342,7 +341,7 @@
|
||||||
dlarf, dlarfb, dlarfg, dlarfgp, dlarft, dlarfx,
|
dlarf, dlarfb, dlarfg, dlarfgp, dlarft, dlarfx,
|
||||||
dlargv, dlarrv, dlartv,
|
dlargv, dlarrv, dlartv,
|
||||||
dlarz, dlarzb, dlarzt, dlasy2, dlasyf,
|
dlarz, dlarzb, dlarzt, dlasy2, dlasyf,
|
||||||
dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz, dlatzm,
|
dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz,
|
||||||
dopgtr, dopmtr, dorg2l, dorg2r,
|
dopgtr, dopmtr, dorg2l, dorg2r,
|
||||||
dorgbr, dorghr, dorgl2, dorglq, dorgql, dorgqr, dorgr2,
|
dorgbr, dorghr, dorgl2, dorglq, dorgql, dorgqr, dorgr2,
|
||||||
dorgrq, dorgtr, dorm2l, dorm2r,
|
dorgrq, dorgtr, dorm2l, dorm2r,
|
||||||
|
@ -368,7 +367,7 @@
|
||||||
dtgsja, dtgsna, dtgsy2, dtgsyl, dtpcon, dtprfs, dtptri,
|
dtgsja, dtgsna, dtgsy2, dtgsyl, dtpcon, dtprfs, dtptri,
|
||||||
dtptrs,
|
dtptrs,
|
||||||
dtrcon, dtrevc, dtrexc, dtrrfs, dtrsen, dtrsna, dtrsyl,
|
dtrcon, dtrevc, dtrexc, dtrrfs, dtrsen, dtrsna, dtrsyl,
|
||||||
dtrtrs, dtzrqf, dtzrzf, dstemr,
|
dtrtrs, dtzrzf, dstemr,
|
||||||
dsgesv, dsposv, dlag2s, slag2d, dlat2s,
|
dsgesv, dsposv, dlag2s, slag2d, dlat2s,
|
||||||
dlansf, dpftrf, dpftri, dpftrs, dsfrk, dtfsm, dtftri, dtfttp,
|
dlansf, dpftrf, dpftri, dpftrs, dsfrk, dtfsm, dtftri, dtfttp,
|
||||||
dtfttr, dtpttf, dtpttr, dtrttf, dtrttp,
|
dtfttr, dtpttf, dtpttr, dtrttf, dtrttp,
|
||||||
|
@ -387,14 +386,13 @@
|
||||||
zbdsqr, zgbbrd, zgbcon, zgbequ, zgbrfs, zgbsv, zgbsvx,
|
zbdsqr, zgbbrd, zgbcon, zgbequ, zgbrfs, zgbsv, zgbsvx,
|
||||||
zgbtf2, zgbtrf, zgbtrs, zgebak, zgebal, zgebd2, zgebrd,
|
zgbtf2, zgbtrf, zgbtrs, zgebak, zgebal, zgebd2, zgebrd,
|
||||||
zgecon, zgeequ, zgees, zgeesx, zgeev, zgeevx,
|
zgecon, zgeequ, zgees, zgeesx, zgeev, zgeevx,
|
||||||
zgegs, zgegv, zgehd2, zgehrd, zgelq2, zgelqf,
|
zgehd2, zgehrd, zgelq2, zgelqf,
|
||||||
zgels, zgelsd, zgelss, zgelsx, zgelsy, zgeql2, zgeqlf, zgeqp3,
|
zgels, zgelsd, zgelss, zgelsy, zgeql2, zgeqlf, zgeqp3,
|
||||||
zgeqpf, zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf,
|
zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf,
|
||||||
zgesc2, zgesdd, zgesvd, zgesvx, zgetc2,
|
zgesc2, zgesdd, zgesvd, zgesvx, zgetc2,
|
||||||
zgetri,
|
zgetri,
|
||||||
zggbak, zggbal, zgges, zggesx, zggev, zggevx, zggglm,
|
zggbak, zggbal, zgges, zggesx, zggev, zggevx, zggglm,
|
||||||
zgghrd, zgglse, zggqrf, zggrqf,
|
zgghrd, zgglse, zggqrf, zggrqf,
|
||||||
zggsvd, zggsvp,
|
|
||||||
zgtcon, zgtrfs, zgtsv, zgtsvx, zgttrf, zgttrs, zgtts2, zhbev,
|
zgtcon, zgtrfs, zgtsv, zgtsvx, zgttrf, zgttrs, zgtts2, zhbev,
|
||||||
zhbevd, zhbevx, zhbgst, zhbgv, zhbgvd, zhbgvx, zhbtrd,
|
zhbevd, zhbevx, zhbgst, zhbgv, zhbgvd, zhbgvx, zhbtrd,
|
||||||
zhecon, zheev, zheevd, zheevr, zheevx, zhegs2, zhegst,
|
zhecon, zheev, zheevd, zheevr, zheevx, zhegs2, zhegst,
|
||||||
|
@ -409,7 +407,7 @@
|
||||||
zlaed0, zlaed7, zlaed8,
|
zlaed0, zlaed7, zlaed8,
|
||||||
zlaein, zlaesy, zlaev2, zlags2, zlagtm,
|
zlaein, zlaesy, zlaev2, zlags2, zlagtm,
|
||||||
zlahef, zlahqr,
|
zlahef, zlahqr,
|
||||||
zlahrd, zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange,
|
zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange,
|
||||||
zlangt, zlanhb,
|
zlangt, zlanhb,
|
||||||
zlanhe,
|
zlanhe,
|
||||||
zlanhp, zlanhs, zlanht, zlansb, zlansp, zlansy, zlantb,
|
zlanhp, zlanhs, zlanht, zlansb, zlansp, zlansy, zlantb,
|
||||||
|
@ -422,7 +420,7 @@
|
||||||
zlarfx, zlargv, zlarnv, zlarrv, zlartg, zlartv,
|
zlarfx, zlargv, zlarnv, zlarrv, zlartg, zlartv,
|
||||||
zlarz, zlarzb, zlarzt, zlascl, zlaset, zlasr,
|
zlarz, zlarzb, zlarzt, zlascl, zlaset, zlasr,
|
||||||
zlassq, zlasyf,
|
zlassq, zlasyf,
|
||||||
zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz, zlatzm,
|
zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz,
|
||||||
zpbcon, zpbequ, zpbrfs, zpbstf, zpbsv,
|
zpbcon, zpbequ, zpbrfs, zpbstf, zpbsv,
|
||||||
zpbsvx, zpbtf2, zpbtrf, zpbtrs, zpocon, zpoequ, zporfs,
|
zpbsvx, zpbtf2, zpbtrf, zpbtrs, zpocon, zpoequ, zporfs,
|
||||||
zposv, zposvx, zpotrs, zpstrf, zpstf2,
|
zposv, zposvx, zpotrs, zpstrf, zpstf2,
|
||||||
|
@ -438,7 +436,7 @@
|
||||||
ztgexc, ztgsen, ztgsja, ztgsna, ztgsy2, ztgsyl, ztpcon,
|
ztgexc, ztgsen, ztgsja, ztgsna, ztgsy2, ztgsyl, ztpcon,
|
||||||
ztprfs, ztptri,
|
ztprfs, ztptri,
|
||||||
ztptrs, ztrcon, ztrevc, ztrexc, ztrrfs, ztrsen, ztrsna,
|
ztptrs, ztrcon, ztrevc, ztrexc, ztrrfs, ztrsen, ztrsna,
|
||||||
ztrsyl, ztrtrs, ztzrqf, ztzrzf, zung2l,
|
ztrsyl, ztrtrs, ztzrzf, zung2l,
|
||||||
zung2r, zungbr, zunghr, zungl2, zunglq, zungql, zungqr, zungr2,
|
zung2r, zungbr, zunghr, zungl2, zunglq, zungql, zungqr, zungr2,
|
||||||
zungrq, zungtr, zunm2l, zunm2r, zunmbr, zunmhr, zunml2,
|
zungrq, zungtr, zunm2l, zunm2r, zunmbr, zunmhr, zunml2,
|
||||||
zunmlq, zunmql, zunmqr, zunmr2, zunmr3, zunmrq, zunmrz,
|
zunmlq, zunmql, zunmqr, zunmr2, zunmr3, zunmrq, zunmrz,
|
||||||
|
@ -452,6 +450,139 @@
|
||||||
zunbdb5, zunbdb6, zuncsd, zuncsd2by1,
|
zunbdb5, zunbdb6, zuncsd, zuncsd2by1,
|
||||||
zgeqrt, zgeqrt2, zgeqrt3, zgemqrt,
|
zgeqrt, zgeqrt2, zgeqrt3, zgemqrt,
|
||||||
ztpqrt, ztpqrt2, ztpmqrt, ztprfb,
|
ztpqrt, ztpqrt2, ztpmqrt, ztprfb,
|
||||||
|
# functions added for lapack-3.6.0
|
||||||
|
|
||||||
|
cgejsv,
|
||||||
|
cgesvdx,
|
||||||
|
cgesvj,
|
||||||
|
cgetrf2,
|
||||||
|
cgges3,
|
||||||
|
cggev3,
|
||||||
|
cgghd3,
|
||||||
|
cggsvd3,
|
||||||
|
cggsvp3,
|
||||||
|
cgsvj0,
|
||||||
|
cgsvj1,
|
||||||
|
clagge,
|
||||||
|
claghe,
|
||||||
|
clagsy,
|
||||||
|
clahilb,
|
||||||
|
clakf2,
|
||||||
|
clarge,
|
||||||
|
clarnd,
|
||||||
|
claror,
|
||||||
|
clarot,
|
||||||
|
clatm1,
|
||||||
|
clatm2,
|
||||||
|
clatm3,
|
||||||
|
clatm5,
|
||||||
|
clatm6,
|
||||||
|
clatme,
|
||||||
|
clatmr,
|
||||||
|
clatms,
|
||||||
|
clatmt,
|
||||||
|
cpotrf2,
|
||||||
|
csbmv,
|
||||||
|
cspr2,
|
||||||
|
csyr2,
|
||||||
|
cunm22,
|
||||||
|
dbdsvdx,
|
||||||
|
dgesvdx,
|
||||||
|
dgetrf2,
|
||||||
|
dgges3,
|
||||||
|
dggev3,
|
||||||
|
dgghd3,
|
||||||
|
dggsvd3,
|
||||||
|
dggsvp3,
|
||||||
|
dladiv2,
|
||||||
|
dlagge,
|
||||||
|
dlagsy,
|
||||||
|
dlahilb,
|
||||||
|
dlakf2,
|
||||||
|
dlaran,
|
||||||
|
dlarge,
|
||||||
|
dlarnd,
|
||||||
|
dlaror,
|
||||||
|
dlarot,
|
||||||
|
dlatm1,
|
||||||
|
dlatm2,
|
||||||
|
dlatm3,
|
||||||
|
dlatm5,
|
||||||
|
dlatm6,
|
||||||
|
dlatm7,
|
||||||
|
dlatme,
|
||||||
|
dlatmr,
|
||||||
|
dlatms,
|
||||||
|
dlatmt,
|
||||||
|
dorm22,
|
||||||
|
dpotrf2,
|
||||||
|
dsecnd,
|
||||||
|
sbdsvdx,
|
||||||
|
second,
|
||||||
|
sgesvdx,
|
||||||
|
sgetrf2,
|
||||||
|
sgges3,
|
||||||
|
sggev3,
|
||||||
|
sgghd3,
|
||||||
|
sggsvd3,
|
||||||
|
sggsvp3,
|
||||||
|
sladiv2,
|
||||||
|
slagge,
|
||||||
|
slagsy,
|
||||||
|
slahilb,
|
||||||
|
slakf2,
|
||||||
|
slaran,
|
||||||
|
slarge,
|
||||||
|
slarnd,
|
||||||
|
slaror,
|
||||||
|
slarot,
|
||||||
|
slatm1,
|
||||||
|
slatm2,
|
||||||
|
slatm3,
|
||||||
|
slatm5,
|
||||||
|
slatm6,
|
||||||
|
slatm7,
|
||||||
|
slatme,
|
||||||
|
slatmr,
|
||||||
|
slatms,
|
||||||
|
slatmt,
|
||||||
|
sorm22,
|
||||||
|
spotrf2,
|
||||||
|
zgejsv,
|
||||||
|
zgesvdx,
|
||||||
|
zgesvj,
|
||||||
|
zgetrf2,
|
||||||
|
zgges3,
|
||||||
|
zggev3,
|
||||||
|
zgghd3,
|
||||||
|
zggsvd3,
|
||||||
|
zggsvp3,
|
||||||
|
zgsvj0,
|
||||||
|
zgsvj1,
|
||||||
|
zlagge,
|
||||||
|
zlaghe,
|
||||||
|
zlagsy,
|
||||||
|
zlahilb,
|
||||||
|
zlakf2,
|
||||||
|
zlarge,
|
||||||
|
zlarnd,
|
||||||
|
zlaror,
|
||||||
|
zlarot,
|
||||||
|
zlatm1,
|
||||||
|
zlatm2,
|
||||||
|
zlatm3,
|
||||||
|
zlatm5,
|
||||||
|
zlatm6,
|
||||||
|
zlatme,
|
||||||
|
zlatmr,
|
||||||
|
zlatms,
|
||||||
|
zlatmt,
|
||||||
|
zpotrf2,
|
||||||
|
zsbmv,
|
||||||
|
zspr2,
|
||||||
|
zsyr2,
|
||||||
|
zunm22
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
@lapack_extendedprecision_objs = (
|
@lapack_extendedprecision_objs = (
|
||||||
|
@ -459,6 +590,13 @@
|
||||||
dlagsy, dsysvxx, sporfsx, slatms, zlatms, zherfsx, csysvxx,
|
dlagsy, dsysvxx, sporfsx, slatms, zlatms, zherfsx, csysvxx,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@lapack_deprecated_objs = (
|
||||||
|
cgegs, cggsvd, ctzrqf, dgeqpf, dlatzm, sgelsx, slahrd, zgegv, zggsvp,
|
||||||
|
cgegv, cggsvp, dgegs, dggsvd, dtzrqf, sgeqpf, slatzm, zgelsx, zlahrd,
|
||||||
|
cgelsx, clahrd, dgegv, dggsvp, sgegs, sggsvd, stzrqf, zgeqpf, zlatzm,
|
||||||
|
cgeqpf, clatzm, dgelsx, dlahrd, sgegv, sggsvp, zgegs, zggsvd, ztzrqf,
|
||||||
|
);
|
||||||
|
|
||||||
@lapackeobjs = (
|
@lapackeobjs = (
|
||||||
# LAPACK C interface routines.
|
# LAPACK C interface routines.
|
||||||
#
|
#
|
||||||
|
@ -682,8 +820,6 @@
|
||||||
LAPACKE_cgeqlf_work,
|
LAPACKE_cgeqlf_work,
|
||||||
LAPACKE_cgeqp3,
|
LAPACKE_cgeqp3,
|
||||||
LAPACKE_cgeqp3_work,
|
LAPACKE_cgeqp3_work,
|
||||||
LAPACKE_cgeqpf,
|
|
||||||
LAPACKE_cgeqpf_work,
|
|
||||||
LAPACKE_cgeqr2,
|
LAPACKE_cgeqr2,
|
||||||
LAPACKE_cgeqr2_work,
|
LAPACKE_cgeqr2_work,
|
||||||
LAPACKE_cgeqrf,
|
LAPACKE_cgeqrf,
|
||||||
|
@ -738,10 +874,6 @@
|
||||||
LAPACKE_cggqrf_work,
|
LAPACKE_cggqrf_work,
|
||||||
LAPACKE_cggrqf,
|
LAPACKE_cggrqf,
|
||||||
LAPACKE_cggrqf_work,
|
LAPACKE_cggrqf_work,
|
||||||
LAPACKE_cggsvd,
|
|
||||||
LAPACKE_cggsvd_work,
|
|
||||||
LAPACKE_cggsvp,
|
|
||||||
LAPACKE_cggsvp_work,
|
|
||||||
LAPACKE_cgtcon,
|
LAPACKE_cgtcon,
|
||||||
LAPACKE_cgtcon_work,
|
LAPACKE_cgtcon_work,
|
||||||
LAPACKE_cgtrfs,
|
LAPACKE_cgtrfs,
|
||||||
|
@ -1186,8 +1318,6 @@
|
||||||
LAPACKE_dgeqlf_work,
|
LAPACKE_dgeqlf_work,
|
||||||
LAPACKE_dgeqp3,
|
LAPACKE_dgeqp3,
|
||||||
LAPACKE_dgeqp3_work,
|
LAPACKE_dgeqp3_work,
|
||||||
LAPACKE_dgeqpf,
|
|
||||||
LAPACKE_dgeqpf_work,
|
|
||||||
LAPACKE_dgeqr2,
|
LAPACKE_dgeqr2,
|
||||||
LAPACKE_dgeqr2_work,
|
LAPACKE_dgeqr2_work,
|
||||||
LAPACKE_dgeqrf,
|
LAPACKE_dgeqrf,
|
||||||
|
@ -1244,10 +1374,6 @@
|
||||||
LAPACKE_dggqrf_work,
|
LAPACKE_dggqrf_work,
|
||||||
LAPACKE_dggrqf,
|
LAPACKE_dggrqf,
|
||||||
LAPACKE_dggrqf_work,
|
LAPACKE_dggrqf_work,
|
||||||
LAPACKE_dggsvd,
|
|
||||||
LAPACKE_dggsvd_work,
|
|
||||||
LAPACKE_dggsvp,
|
|
||||||
LAPACKE_dggsvp_work,
|
|
||||||
LAPACKE_dgtcon,
|
LAPACKE_dgtcon,
|
||||||
LAPACKE_dgtcon_work,
|
LAPACKE_dgtcon_work,
|
||||||
LAPACKE_dgtrfs,
|
LAPACKE_dgtrfs,
|
||||||
|
@ -1676,8 +1802,6 @@
|
||||||
LAPACKE_sgeqlf_work,
|
LAPACKE_sgeqlf_work,
|
||||||
LAPACKE_sgeqp3,
|
LAPACKE_sgeqp3,
|
||||||
LAPACKE_sgeqp3_work,
|
LAPACKE_sgeqp3_work,
|
||||||
LAPACKE_sgeqpf,
|
|
||||||
LAPACKE_sgeqpf_work,
|
|
||||||
LAPACKE_sgeqr2,
|
LAPACKE_sgeqr2,
|
||||||
LAPACKE_sgeqr2_work,
|
LAPACKE_sgeqr2_work,
|
||||||
LAPACKE_sgeqrf,
|
LAPACKE_sgeqrf,
|
||||||
|
@ -1734,10 +1858,6 @@
|
||||||
LAPACKE_sggqrf_work,
|
LAPACKE_sggqrf_work,
|
||||||
LAPACKE_sggrqf,
|
LAPACKE_sggrqf,
|
||||||
LAPACKE_sggrqf_work,
|
LAPACKE_sggrqf_work,
|
||||||
LAPACKE_sggsvd,
|
|
||||||
LAPACKE_sggsvd_work,
|
|
||||||
LAPACKE_sggsvp,
|
|
||||||
LAPACKE_sggsvp_work,
|
|
||||||
LAPACKE_sgtcon,
|
LAPACKE_sgtcon,
|
||||||
LAPACKE_sgtcon_work,
|
LAPACKE_sgtcon_work,
|
||||||
LAPACKE_sgtrfs,
|
LAPACKE_sgtrfs,
|
||||||
|
@ -2158,8 +2278,6 @@
|
||||||
LAPACKE_zgeqlf_work,
|
LAPACKE_zgeqlf_work,
|
||||||
LAPACKE_zgeqp3,
|
LAPACKE_zgeqp3,
|
||||||
LAPACKE_zgeqp3_work,
|
LAPACKE_zgeqp3_work,
|
||||||
LAPACKE_zgeqpf,
|
|
||||||
LAPACKE_zgeqpf_work,
|
|
||||||
LAPACKE_zgeqr2,
|
LAPACKE_zgeqr2,
|
||||||
LAPACKE_zgeqr2_work,
|
LAPACKE_zgeqr2_work,
|
||||||
LAPACKE_zgeqrf,
|
LAPACKE_zgeqrf,
|
||||||
|
@ -2214,10 +2332,6 @@
|
||||||
LAPACKE_zggqrf_work,
|
LAPACKE_zggqrf_work,
|
||||||
LAPACKE_zggrqf,
|
LAPACKE_zggrqf,
|
||||||
LAPACKE_zggrqf_work,
|
LAPACKE_zggrqf_work,
|
||||||
LAPACKE_zggsvd,
|
|
||||||
LAPACKE_zggsvd_work,
|
|
||||||
LAPACKE_zggsvp,
|
|
||||||
LAPACKE_zggsvp_work,
|
|
||||||
LAPACKE_zgtcon,
|
LAPACKE_zgtcon,
|
||||||
LAPACKE_zgtcon_work,
|
LAPACKE_zgtcon_work,
|
||||||
LAPACKE_zgtrfs,
|
LAPACKE_zgtrfs,
|
||||||
|
@ -2707,6 +2821,134 @@
|
||||||
LAPACKE_slagsy_work,
|
LAPACKE_slagsy_work,
|
||||||
LAPACKE_zlagsy,
|
LAPACKE_zlagsy,
|
||||||
LAPACKE_zlagsy_work,
|
LAPACKE_zlagsy_work,
|
||||||
|
## new function from lapack-3.6.0
|
||||||
|
|
||||||
|
LAPACKE_cgejsv,
|
||||||
|
LAPACKE_cgejsv_work,
|
||||||
|
LAPACKE_cgesvdx,
|
||||||
|
LAPACKE_cgesvdx_work,
|
||||||
|
LAPACKE_cgesvj,
|
||||||
|
LAPACKE_cgesvj_work,
|
||||||
|
LAPACKE_cgetrf2,
|
||||||
|
LAPACKE_cgetrf2_work,
|
||||||
|
LAPACKE_cgges3,
|
||||||
|
LAPACKE_cgges3_work,
|
||||||
|
LAPACKE_cggev3,
|
||||||
|
LAPACKE_cggev3_work,
|
||||||
|
LAPACKE_cgghd3,
|
||||||
|
LAPACKE_cgghd3_work,
|
||||||
|
LAPACKE_cggsvd3,
|
||||||
|
LAPACKE_cggsvd3_work,
|
||||||
|
LAPACKE_cggsvp3,
|
||||||
|
LAPACKE_cggsvp3_work,
|
||||||
|
LAPACKE_chetrf_rook,
|
||||||
|
LAPACKE_chetrf_rook_work,
|
||||||
|
LAPACKE_chetrs_rook,
|
||||||
|
LAPACKE_chetrs_rook_work,
|
||||||
|
LAPACKE_clapmt,
|
||||||
|
LAPACKE_clapmt_work,
|
||||||
|
LAPACKE_clascl,
|
||||||
|
LAPACKE_clascl_work,
|
||||||
|
LAPACKE_cpotrf2,
|
||||||
|
LAPACKE_cpotrf2_work,
|
||||||
|
LAPACKE_csytrf_rook,
|
||||||
|
LAPACKE_csytrf_rook_work,
|
||||||
|
LAPACKE_csytrs_rook,
|
||||||
|
LAPACKE_csytrs_rook_work,
|
||||||
|
LAPACKE_cuncsd2by1,
|
||||||
|
LAPACKE_cuncsd2by1_work,
|
||||||
|
LAPACKE_dbdsvdx,
|
||||||
|
LAPACKE_dbdsvdx_work,
|
||||||
|
LAPACKE_dgesvdx,
|
||||||
|
LAPACKE_dgesvdx_work,
|
||||||
|
LAPACKE_dgetrf2,
|
||||||
|
LAPACKE_dgetrf2_work,
|
||||||
|
LAPACKE_dgges3,
|
||||||
|
LAPACKE_dgges3_work,
|
||||||
|
LAPACKE_dggev3,
|
||||||
|
LAPACKE_dggev3_work,
|
||||||
|
LAPACKE_dgghd3,
|
||||||
|
LAPACKE_dgghd3_work,
|
||||||
|
LAPACKE_dggsvd3,
|
||||||
|
LAPACKE_dggsvd3_work,
|
||||||
|
LAPACKE_dggsvp3,
|
||||||
|
LAPACKE_dggsvp3_work,
|
||||||
|
LAPACKE_dlapmt,
|
||||||
|
LAPACKE_dlapmt_work,
|
||||||
|
LAPACKE_dlascl,
|
||||||
|
LAPACKE_dlascl_work,
|
||||||
|
LAPACKE_dorcsd2by1,
|
||||||
|
LAPACKE_dorcsd2by1_work,
|
||||||
|
LAPACKE_dpotrf2,
|
||||||
|
LAPACKE_dpotrf2_work,
|
||||||
|
LAPACKE_dsytrf_rook,
|
||||||
|
LAPACKE_dsytrf_rook_work,
|
||||||
|
LAPACKE_dsytrs_rook,
|
||||||
|
LAPACKE_dsytrs_rook_work,
|
||||||
|
LAPACKE_sbdsvdx,
|
||||||
|
LAPACKE_sbdsvdx_work,
|
||||||
|
LAPACKE_sgesvdx,
|
||||||
|
LAPACKE_sgesvdx_work,
|
||||||
|
LAPACKE_sgetrf2,
|
||||||
|
LAPACKE_sgetrf2_work,
|
||||||
|
LAPACKE_sgges3,
|
||||||
|
LAPACKE_sgges3_work,
|
||||||
|
LAPACKE_sggev3,
|
||||||
|
LAPACKE_sggev3_work,
|
||||||
|
LAPACKE_sgghd3,
|
||||||
|
LAPACKE_sgghd3_work,
|
||||||
|
LAPACKE_sggsvd3,
|
||||||
|
LAPACKE_sggsvd3_work,
|
||||||
|
LAPACKE_sggsvp3,
|
||||||
|
LAPACKE_sggsvp3_work,
|
||||||
|
LAPACKE_slapmt,
|
||||||
|
LAPACKE_slapmt_work,
|
||||||
|
LAPACKE_slascl,
|
||||||
|
LAPACKE_slascl_work,
|
||||||
|
LAPACKE_sorcsd2by1,
|
||||||
|
LAPACKE_sorcsd2by1_work,
|
||||||
|
LAPACKE_spotrf2,
|
||||||
|
LAPACKE_spotrf2_work,
|
||||||
|
LAPACKE_ssytrf_rook,
|
||||||
|
LAPACKE_ssytrf_rook_work,
|
||||||
|
LAPACKE_ssytrs_rook,
|
||||||
|
LAPACKE_ssytrs_rook_work,
|
||||||
|
LAPACKE_stpqrt,
|
||||||
|
LAPACKE_stpqrt_work,
|
||||||
|
LAPACKE_zgejsv,
|
||||||
|
LAPACKE_zgejsv_work,
|
||||||
|
LAPACKE_zgesvdx,
|
||||||
|
LAPACKE_zgesvdx_work,
|
||||||
|
LAPACKE_zgesvj,
|
||||||
|
LAPACKE_zgesvj_work,
|
||||||
|
LAPACKE_zgetrf2,
|
||||||
|
LAPACKE_zgetrf2_work,
|
||||||
|
LAPACKE_zgges3,
|
||||||
|
LAPACKE_zgges3_work,
|
||||||
|
LAPACKE_zggev3,
|
||||||
|
LAPACKE_zggev3_work,
|
||||||
|
LAPACKE_zgghd3,
|
||||||
|
LAPACKE_zgghd3_work,
|
||||||
|
LAPACKE_zggsvd3,
|
||||||
|
LAPACKE_zggsvd3_work,
|
||||||
|
LAPACKE_zggsvp3,
|
||||||
|
LAPACKE_zggsvp3_work,
|
||||||
|
LAPACKE_zhetrf_rook,
|
||||||
|
LAPACKE_zhetrf_rook_work,
|
||||||
|
LAPACKE_zhetrs_rook,
|
||||||
|
LAPACKE_zhetrs_rook_work,
|
||||||
|
LAPACKE_zlapmt,
|
||||||
|
LAPACKE_zlapmt_work,
|
||||||
|
LAPACKE_zlascl,
|
||||||
|
LAPACKE_zlascl_work,
|
||||||
|
LAPACKE_zpotrf2,
|
||||||
|
LAPACKE_zpotrf2_work,
|
||||||
|
LAPACKE_zsytrf_rook,
|
||||||
|
LAPACKE_zsytrf_rook_work,
|
||||||
|
LAPACKE_zsytrs_rook,
|
||||||
|
LAPACKE_zsytrs_rook_work,
|
||||||
|
LAPACKE_zuncsd2by1,
|
||||||
|
LAPACKE_zuncsd2by1_work
|
||||||
);
|
);
|
||||||
|
|
||||||
#These function may need 2 underscores.
|
#These function may need 2 underscores.
|
||||||
|
@ -2749,6 +2991,11 @@ if ($ARGV[8] == 1) {
|
||||||
@need_2underscore_objs = (@lapack_embeded_underscore_objs);
|
@need_2underscore_objs = (@lapack_embeded_underscore_objs);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if ($ARGV[11] == 1){
|
||||||
|
#BUILD_LAPACK_DEPRECATED=1
|
||||||
|
@underscore_objs =(@underscore_objs, @lapack_deprecated_objs);
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs);
|
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs);
|
||||||
}
|
}
|
||||||
|
|
5
f_check
5
f_check
|
@ -1,5 +1,7 @@
|
||||||
#!/usr/bin/perl
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||||
|
|
||||||
#
|
#
|
||||||
# 1. Not specified
|
# 1. Not specified
|
||||||
# 1.1 Automatically detect, then check compiler
|
# 1.1 Automatically detect, then check compiler
|
||||||
|
@ -272,8 +274,9 @@ if ($link ne "") {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($flags =~ /^\-Y/) {
|
if ($flags =~ /^\-Y/) {
|
||||||
|
next if ($hostos eq 'SunOS');
|
||||||
$linker_L .= "-Wl,". $flags . " ";
|
$linker_L .= "-Wl,". $flags . " ";
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($flags =~ /^\-rpath\@/) {
|
if ($flags =~ /^\-rpath\@/) {
|
||||||
$flags =~ s/\@/\,/g;
|
$flags =~ s/\@/\,/g;
|
||||||
|
|
42
getarch.c
42
getarch.c
|
@ -86,7 +86,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
#endif
|
#endif
|
||||||
#ifdef linux
|
#if defined(linux) || defined(__sun__)
|
||||||
#include <sys/sysinfo.h>
|
#include <sys/sysinfo.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#endif
|
#endif
|
||||||
|
@ -552,7 +552,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CORENAME "POWER5"
|
#define CORENAME "POWER5"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(FORCE_POWER6) || defined(FORCE_POWER7) || defined(FORCE_POWER8)
|
#if defined(FORCE_POWER6) || defined(FORCE_POWER7)
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define ARCHITECTURE "POWER"
|
#define ARCHITECTURE "POWER"
|
||||||
#define SUBARCHITECTURE "POWER6"
|
#define SUBARCHITECTURE "POWER6"
|
||||||
|
@ -565,6 +565,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CORENAME "POWER6"
|
#define CORENAME "POWER6"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(FORCE_POWER8)
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "POWER"
|
||||||
|
#define SUBARCHITECTURE "POWER8"
|
||||||
|
#define SUBDIRNAME "power"
|
||||||
|
#define ARCHCONFIG "-DPOWER8 " \
|
||||||
|
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
|
||||||
|
"-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||||
|
#define LIBNAME "power8"
|
||||||
|
#define CORENAME "POWER8"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef FORCE_PPCG4
|
#ifdef FORCE_PPCG4
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define ARCHITECTURE "POWER"
|
#define ARCHITECTURE "POWER"
|
||||||
|
@ -819,10 +833,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 "
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 "
|
||||||
#define LIBNAME "armv8"
|
#define LIBNAME "armv8"
|
||||||
#define CORENAME "XGENE1"
|
#define CORENAME "ARMV8"
|
||||||
#else
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_CORTEXA57
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "ARMV8"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DCORTEXA57 " \
|
||||||
|
"-DL1_CODE_SIZE=49152 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \
|
||||||
|
"-DL2_SIZE=2097152 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||||
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON"
|
||||||
|
#define LIBNAME "cortexa57"
|
||||||
|
#define CORENAME "CORTEXA57"
|
||||||
|
#else
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef FORCE
|
#ifndef FORCE
|
||||||
|
|
||||||
|
@ -892,7 +920,7 @@ static int get_num_cores(void) {
|
||||||
size_t len;
|
size_t len;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef linux
|
#if defined(linux) || defined(__sun__)
|
||||||
//returns the number of processors which are currently online
|
//returns the number of processors which are currently online
|
||||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
|
||||||
|
@ -984,7 +1012,9 @@ int main(int argc, char *argv[]){
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if NO_PARALLEL_MAKE==1
|
#ifdef MAKE_NB_JOBS
|
||||||
|
printf("MAKE += -j %d\n", MAKE_NB_JOBS);
|
||||||
|
#elif NO_PARALLEL_MAKE==1
|
||||||
printf("MAKE += -j 1\n");
|
printf("MAKE += -j 1\n");
|
||||||
#else
|
#else
|
||||||
#ifndef OS_WINDOWS
|
#ifndef OS_WINDOWS
|
||||||
|
|
|
@ -79,11 +79,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
|
||||||
FLOAT alpha = *ALPHA;
|
FLOAT alpha = *ALPHA;
|
||||||
FLOAT beta = *BETA;
|
FLOAT beta = *BETA;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
|
int buffer_size;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
int nthreads_max;
|
|
||||||
int nthreads_avail;
|
|
||||||
double MNK;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
|
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
|
||||||
|
@ -134,13 +132,10 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
blasint lenx, leny;
|
blasint lenx, leny;
|
||||||
int trans;
|
int trans, buffer_size;
|
||||||
blasint info, t;
|
blasint info, t;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
int nthreads_max;
|
|
||||||
int nthreads_avail;
|
|
||||||
double MNK;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
|
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = {
|
||||||
|
@ -215,43 +210,20 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
if (incx < 0) x -= (lenx - 1) * incx;
|
if (incx < 0) x -= (lenx - 1) * incx;
|
||||||
if (incy < 0) y -= (leny - 1) * incy;
|
if (incy < 0) y -= (leny - 1) * incy;
|
||||||
|
|
||||||
#ifdef MAX_STACK_ALLOC
|
buffer_size = m + n + 128 / sizeof(FLOAT);
|
||||||
// make it volatile because some gemv implementation (ex: dgemv_n.S)
|
#ifdef WINDOWS_ABI
|
||||||
// do not restore all register
|
buffer_size += 160 / sizeof(FLOAT) ;
|
||||||
volatile int stack_alloc_size = 0;
|
|
||||||
//for gemv_n and gemv_t, try to allocate on stack
|
|
||||||
stack_alloc_size = m + n;
|
|
||||||
#ifdef ALIGNED_ACCESS
|
|
||||||
stack_alloc_size += 3;
|
|
||||||
#endif
|
|
||||||
if(stack_alloc_size < 128)
|
|
||||||
//dgemv_n.S require a 128 bytes buffer
|
|
||||||
stack_alloc_size = 128;
|
|
||||||
|
|
||||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
|
|
||||||
stack_alloc_size = 0;
|
|
||||||
|
|
||||||
FLOAT stack_buffer[stack_alloc_size];
|
|
||||||
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
|
|
||||||
// printf("stack_alloc_size=%d\n", stack_alloc_size);
|
|
||||||
#else
|
|
||||||
//Original OpenBLAS/GotoBLAS codes.
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
|
||||||
#endif
|
#endif
|
||||||
|
// for alignment
|
||||||
|
buffer_size = (buffer_size + 3) & ~3;
|
||||||
|
STACK_ALLOC(buffer_size, FLOAT, buffer);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
|
|
||||||
nthreads_max = num_cpu_avail(2);
|
if ( 1L * m * n < 2304L * GEMM_MULTITHREAD_THRESHOLD )
|
||||||
nthreads_avail = nthreads_max;
|
nthreads = 1;
|
||||||
|
|
||||||
MNK = (double) m * (double) n;
|
|
||||||
if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) )
|
|
||||||
nthreads_max = 1;
|
|
||||||
|
|
||||||
if ( nthreads_max > nthreads_avail )
|
|
||||||
nthreads = nthreads_avail;
|
|
||||||
else
|
else
|
||||||
nthreads = nthreads_max;
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
@ -266,14 +238,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MAX_STACK_ALLOC
|
STACK_FREE(buffer);
|
||||||
if(!stack_alloc_size){
|
|
||||||
blas_memory_free(buffer);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
blas_memory_free(buffer);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
||||||
|
|
||||||
IDEBUG_END;
|
IDEBUG_END;
|
||||||
|
|
|
@ -171,19 +171,14 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
if (incy < 0) y -= (n - 1) * incy;
|
if (incy < 0) y -= (n - 1) * incy;
|
||||||
if (incx < 0) x -= (m - 1) * incx;
|
if (incx < 0) x -= (m - 1) * incx;
|
||||||
|
|
||||||
#ifdef MAX_STACK_ALLOC
|
STACK_ALLOC(m, FLOAT, buffer);
|
||||||
volatile int stack_alloc_size = m;
|
|
||||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
|
|
||||||
stack_alloc_size = 0;
|
|
||||||
FLOAT stack_buffer[stack_alloc_size];
|
|
||||||
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
|
|
||||||
#else
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SMPTEST
|
#ifdef SMPTEST
|
||||||
nthreads = num_cpu_avail(2);
|
// Threshold chosen so that speed-up is > 1 on a Xeon E5-2630
|
||||||
|
if(1L * m * n > 2048L * GEMM_MULTITHREAD_THRESHOLD)
|
||||||
|
nthreads = num_cpu_avail(2);
|
||||||
|
else
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
@ -198,11 +193,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MAX_STACK_ALLOC
|
STACK_FREE(buffer);
|
||||||
if(!stack_alloc_size)
|
|
||||||
#endif
|
|
||||||
blas_memory_free(buffer);
|
|
||||||
|
|
||||||
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
||||||
|
|
||||||
IDEBUG_END;
|
IDEBUG_END;
|
||||||
|
|
|
@ -95,7 +95,7 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
||||||
s = db / r;
|
s = db / r;
|
||||||
z = ONE;
|
z = ONE;
|
||||||
if (ada > adb) z = s;
|
if (ada > adb) z = s;
|
||||||
if ((ada < adb) && (c != ZERO)) z = ONE / c;
|
if ((ada <= adb) && (c != ZERO)) z = ONE / c;
|
||||||
|
|
||||||
*C = c;
|
*C = c;
|
||||||
*S = s;
|
*S = s;
|
||||||
|
|
|
@ -77,12 +77,13 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
||||||
if (incy < 0) y -= (n - 1) * incy;
|
if (incy < 0) y -= (n - 1) * incy;
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
nthreads = num_cpu_avail(1);
|
|
||||||
|
|
||||||
//disable multi-thread when incx==0 or incy==0
|
//disable multi-thread when incx==0 or incy==0
|
||||||
//In that case, the threads would be dependent.
|
//In that case, the threads would be dependent.
|
||||||
if (incx == 0 || incy == 0)
|
if (incx == 0 || incy == 0 || n < 2097152 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT))
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
else
|
||||||
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -91,6 +91,27 @@
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef SMP
|
||||||
|
#ifndef COMPLEX
|
||||||
|
#ifdef XDOUBLE
|
||||||
|
#define MODE (BLAS_XDOUBLE | BLAS_REAL)
|
||||||
|
#elif defined(DOUBLE)
|
||||||
|
#define MODE (BLAS_DOUBLE | BLAS_REAL)
|
||||||
|
#else
|
||||||
|
#define MODE (BLAS_SINGLE | BLAS_REAL)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#ifdef XDOUBLE
|
||||||
|
#define MODE (BLAS_XDOUBLE | BLAS_COMPLEX)
|
||||||
|
#elif defined(DOUBLE)
|
||||||
|
#define MODE (BLAS_DOUBLE | BLAS_COMPLEX)
|
||||||
|
#else
|
||||||
|
#define MODE (BLAS_SINGLE | BLAS_COMPLEX)
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
static int (*symm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
|
static int (*symm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
|
||||||
#ifndef GEMM3M
|
#ifndef GEMM3M
|
||||||
#ifndef HEMM
|
#ifndef HEMM
|
||||||
|
@ -135,26 +156,6 @@ void NAME(char *SIDE, char *UPLO,
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
FLOAT *sa, *sb;
|
FLOAT *sa, *sb;
|
||||||
|
|
||||||
#ifdef SMP
|
|
||||||
#ifndef COMPLEX
|
|
||||||
#ifdef XDOUBLE
|
|
||||||
int mode = BLAS_XDOUBLE | BLAS_REAL;
|
|
||||||
#elif defined(DOUBLE)
|
|
||||||
int mode = BLAS_DOUBLE | BLAS_REAL;
|
|
||||||
#else
|
|
||||||
int mode = BLAS_SINGLE | BLAS_REAL;
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#ifdef XDOUBLE
|
|
||||||
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
|
|
||||||
#elif defined(DOUBLE)
|
|
||||||
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
|
|
||||||
#else
|
|
||||||
int mode = BLAS_SINGLE | BLAS_COMPLEX;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(SMP) && !defined(NO_AFFINITY)
|
#if defined(SMP) && !defined(NO_AFFINITY)
|
||||||
int nodes;
|
int nodes;
|
||||||
#endif
|
#endif
|
||||||
|
@ -246,26 +247,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
FLOAT *sa, *sb;
|
FLOAT *sa, *sb;
|
||||||
|
|
||||||
#ifdef SMP
|
|
||||||
#ifndef COMPLEX
|
|
||||||
#ifdef XDOUBLE
|
|
||||||
int mode = BLAS_XDOUBLE | BLAS_REAL;
|
|
||||||
#elif defined(DOUBLE)
|
|
||||||
int mode = BLAS_DOUBLE | BLAS_REAL;
|
|
||||||
#else
|
|
||||||
int mode = BLAS_SINGLE | BLAS_REAL;
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#ifdef XDOUBLE
|
|
||||||
int mode = BLAS_XDOUBLE | BLAS_COMPLEX;
|
|
||||||
#elif defined(DOUBLE)
|
|
||||||
int mode = BLAS_DOUBLE | BLAS_COMPLEX;
|
|
||||||
#else
|
|
||||||
int mode = BLAS_SINGLE | BLAS_COMPLEX;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(SMP) && !defined(NO_AFFINITY)
|
#if defined(SMP) && !defined(NO_AFFINITY)
|
||||||
int nodes;
|
int nodes;
|
||||||
#endif
|
#endif
|
||||||
|
@ -407,7 +388,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
|
||||||
|
|
||||||
args.nthreads /= nodes;
|
args.nthreads /= nodes;
|
||||||
|
|
||||||
gemm_thread_mn(mode, &args, NULL, NULL,
|
gemm_thread_mn(MODE, &args, NULL, NULL,
|
||||||
symm[4 | (side << 1) | uplo ], sa, sb, nodes);
|
symm[4 | (side << 1) | uplo ], sa, sb, nodes);
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -419,7 +400,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
GEMM_THREAD(mode, &args, NULL, NULL, symm[(side << 1) | uplo ], sa, sb, args.nthreads);
|
GEMM_THREAD(MODE, &args, NULL, NULL, symm[(side << 1) | uplo ], sa, sb, args.nthreads);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -116,7 +116,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA,
|
||||||
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) {
|
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) {
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
int trans, uplo;
|
int uplo;
|
||||||
blasint info;
|
blasint info;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
|
@ -124,7 +124,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha,
|
||||||
|
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
||||||
trans = -1;
|
|
||||||
uplo = -1;
|
uplo = -1;
|
||||||
info = 0;
|
info = 0;
|
||||||
|
|
||||||
|
|
|
@ -118,7 +118,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA,
|
||||||
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT *a, blasint lda) {
|
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint incy, FLOAT *a, blasint lda) {
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
int trans, uplo;
|
int uplo;
|
||||||
blasint info;
|
blasint info;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
|
@ -126,7 +126,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha,
|
||||||
|
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
||||||
trans = -1;
|
|
||||||
uplo = -1;
|
uplo = -1;
|
||||||
info = 0;
|
info = 0;
|
||||||
|
|
||||||
|
|
|
@ -77,11 +77,9 @@ void NAME(char *TRANS, blasint *M, blasint *N,
|
||||||
blasint incy = *INCY;
|
blasint incy = *INCY;
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
|
int buffer_size;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
int nthreads_max;
|
|
||||||
int nthreads_avail;
|
|
||||||
double MNK;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
|
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
|
||||||
|
@ -144,13 +142,10 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
blasint lenx, leny;
|
blasint lenx, leny;
|
||||||
int trans;
|
int trans, buffer_size;
|
||||||
blasint info, t;
|
blasint info, t;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
int nthreads_max;
|
|
||||||
int nthreads_avail;
|
|
||||||
double MNK;
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
|
int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG,
|
||||||
|
@ -236,22 +231,26 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
if (incx < 0) x -= (lenx - 1) * incx * 2;
|
if (incx < 0) x -= (lenx - 1) * incx * 2;
|
||||||
if (incy < 0) y -= (leny - 1) * incy * 2;
|
if (incy < 0) y -= (leny - 1) * incy * 2;
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer_size = 2 * (m + n) + 128 / sizeof(FLOAT);
|
||||||
|
#ifdef WINDOWS_ABI
|
||||||
|
buffer_size += 160 / sizeof(FLOAT) ;
|
||||||
|
#endif
|
||||||
|
// for alignment
|
||||||
|
buffer_size = (buffer_size + 3) & ~3;
|
||||||
|
STACK_ALLOC(buffer_size, FLOAT, buffer);
|
||||||
|
|
||||||
|
#if defined(ARCH_X86_64) && defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||||
|
// cgemv_t.S return NaN if there are NaN or Inf in the buffer (see bug #746)
|
||||||
|
if(trans && stack_alloc_size)
|
||||||
|
memset(buffer, 0, MIN(BUFFER_SIZE, sizeof(FLOAT) * buffer_size));
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
|
|
||||||
nthreads_max = num_cpu_avail(2);
|
if ( 1L * m * n < 1024L * GEMM_MULTITHREAD_THRESHOLD )
|
||||||
nthreads_avail = nthreads_max;
|
nthreads = 1;
|
||||||
|
|
||||||
MNK = (double) m * (double) n;
|
|
||||||
if ( MNK <= ( 256.0 * (double) (GEMM_MULTITHREAD_THRESHOLD * GEMM_MULTITHREAD_THRESHOLD) ))
|
|
||||||
nthreads_max = 1;
|
|
||||||
|
|
||||||
if ( nthreads_max > nthreads_avail )
|
|
||||||
nthreads = nthreads_avail;
|
|
||||||
else
|
else
|
||||||
nthreads = nthreads_max;
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
@ -267,7 +266,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
blas_memory_free(buffer);
|
STACK_FREE(buffer);
|
||||||
|
|
||||||
FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n);
|
FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n);
|
||||||
|
|
||||||
|
|
|
@ -210,10 +210,14 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
if (incy < 0) y -= (n - 1) * incy * 2;
|
if (incy < 0) y -= (n - 1) * incy * 2;
|
||||||
if (incx < 0) x -= (m - 1) * incx * 2;
|
if (incx < 0) x -= (m - 1) * incx * 2;
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
STACK_ALLOC(2 * m, FLOAT, buffer);
|
||||||
|
|
||||||
#ifdef SMPTEST
|
#ifdef SMPTEST
|
||||||
nthreads = num_cpu_avail(2);
|
// Threshold chosen so that speed-up is > 1 on a Xeon E5-2630
|
||||||
|
if(1L * m * n > 36L * sizeof(FLOAT) * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD)
|
||||||
|
nthreads = num_cpu_avail(2);
|
||||||
|
else
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
@ -245,7 +249,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
blas_memory_free(buffer);
|
STACK_FREE(buffer);
|
||||||
|
|
||||||
FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n);
|
FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n);
|
||||||
|
|
||||||
|
|
|
@ -117,7 +117,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA
|
||||||
FLOAT beta_i = BETA[1];
|
FLOAT beta_i = BETA[1];
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
int trans, uplo;
|
int uplo;
|
||||||
blasint info;
|
blasint info;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
|
@ -135,7 +135,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA
|
||||||
|
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
||||||
trans = -1;
|
|
||||||
uplo = -1;
|
uplo = -1;
|
||||||
info = 0;
|
info = 0;
|
||||||
|
|
||||||
|
|
|
@ -116,7 +116,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA,
|
||||||
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) {
|
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *a, blasint lda) {
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
int trans, uplo;
|
int uplo;
|
||||||
blasint info;
|
blasint info;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
|
@ -124,7 +124,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha,
|
||||||
|
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
||||||
trans = -1;
|
|
||||||
uplo = -1;
|
uplo = -1;
|
||||||
info = 0;
|
info = 0;
|
||||||
|
|
||||||
|
|
|
@ -121,7 +121,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA
|
||||||
FLOAT alpha_r = ALPHA[0];
|
FLOAT alpha_r = ALPHA[0];
|
||||||
FLOAT alpha_i = ALPHA[1];
|
FLOAT alpha_i = ALPHA[1];
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
int trans, uplo;
|
int uplo;
|
||||||
blasint info;
|
blasint info;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
|
@ -129,7 +129,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT *ALPHA
|
||||||
|
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_CNAME;
|
||||||
|
|
||||||
trans = -1;
|
|
||||||
uplo = -1;
|
uplo = -1;
|
||||||
info = 0;
|
info = 0;
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue